#' @title Simulated example of a 2 way interaction GxE model.
#' @description Simulated example of a 2 way interaction GxE model (where G and E are latent variables). 
#' \deqn{g_j \sim Binomial(n=1,p=.30)}
#' \deqn{j = 1, 2, 3, 4}
#' \deqn{e_l \sim Normal(\mu=0,\sigma=1.5)}
#' \deqn{l = 1, 2, 3}
#' \deqn{g = .2g_1 + .15g_2 - .3g_3 + .1g_4 + .05g_1g_3 + .2g_2g_3}
#' \deqn{e = -.45e_1 + .35e_2 + .2e_3}
#' \deqn{\mu = -1 + 2g + 3e + 4ge}
#' \tabular{cc}{
#' \eqn{y \sim Normal(\mu=\mu,\sigma=\code{sigma})} if \code{logit}=FALSE \cr
#' \eqn{y \sim Binomial(n=1,p=logit(\mu))} if \code{logit}=TRUE
#' }
#' @param N Sample size.
#' @param sigma Standard deviation of the gaussian noise (if \code{logit}=FALSE).
#' @param logit If TRUE, the outcome is transformed to binary with a logit link.
#' @param seed RNG seed.
#' @return Returns a list containing, in the following order: data.frame with the observed outcome (with noise) and the true outcome (without noise), data.frame of the genetic variants (G), data.frame of the environments (E), vector of the true genetic coefficients, vector of the true environmental coefficients, vector of the true main model coefficients
#' @examples
#'	example_2way(5,1,logit=FALSE)
#'	example_2way(5,0,logit=TRUE)
#' @export
"example_2way"

#' @title Simulated example of a 3 way interaction GxExz model
#' @description Simulated example of a 3 way interaction GxExz model (where G and E are latent variables). 
#' \deqn{g_j \sim Binomial(n=1,p=.30)}
#' \deqn{j = 1, 2, 3, 4}
#' \deqn{e_l \sim Normal(\mu=0,\sigma=1.5)}
#' \deqn{l = 1, 2, 3}
#' \deqn{z \sim Normal(\mu=3,\sigma=1)}
#' \deqn{g = .2g_1 + .15g_2 - .3g_3 + .1g_4 + .05g_1g_3 + .2g_2g_3}
#' \deqn{e = -.45e_1 + .35e_2 + .2e_3}
#' \deqn{\mu = -2 + 2g + 3e + z + 5ge - 1.5ez + 2gz + 2gez}
#' \tabular{cc}{
#' \eqn{y \sim Normal(\mu=\mu,\sigma=\code{sigma})} if \code{logit}=FALSE \cr
#' \eqn{y \sim Binomial(n=1,p=logit(\mu))} if \code{logit}=TRUE
#' }
#' @param N Sample size.
#' @param sigma Standard deviation of the gaussian noise (if \code{logit}=FALSE).
#' @param logit If TRUE, the outcome is transformed to binary with a logit link.
#' @param seed RNG seed.
#' @return Returns a list containing, in the following order: data.frame with the observed outcome (with noise), the true outcome (without noise) and \eqn{z}, data.frame of the genetic variants (G), data.frame of the environments (E), vector of the true genetic coefficients, vector of the true environmental coefficients, vector of the true main model coefficients
#' @examples
#'	example_3way(5,2.5,logit=FALSE)
#'	example_3way(5,0,logit=TRUE)
#' @export
"example_3way"

#' @title Simulated example of a 3 way interaction GxExZ model
#' @description Simulated example of a 3 way interaction GxExZ model (where G, E and Z are latent variables). 
#' \deqn{g_j \sim Binomial(n=1,p=.30)}
#' \deqn{j = 1, 2, 3, 4}
#' \deqn{e_k \sim Normal(\mu=0,\sigma=1.5)}
#' \deqn{k = 1, 2, 3}
#' \deqn{z_l \sim Normal(\mu=3,\sigma=1)}
#' \deqn{l = 1, 2, 3}
#' \deqn{g = .2g_1 + .15g_2 - .3g_3 + .1g_4 + .05g_1g_3 + .2g_2g_3}
#' \deqn{e = -.45e_1 + .35e_2 + .2e_3}
#' \deqn{z = .15z_1 + .60z_2 + .25z_3}
#' \deqn{\mu = -2 + 2g + 3e + z + 5ge - 1.5ez + 2gz + 2gez}
#' \tabular{cc}{
#' \eqn{y \sim Normal(\mu=\mu,\sigma=\code{sigma})} if \code{logit}=FALSE \cr
#' \eqn{y \sim Binomial(n=1,p=logit(\mu))} if \code{logit}=TRUE
#' }
#' @param N Sample size.
#' @param sigma Standard deviation of the gaussian noise (if \code{logit}=FALSE).
#' @param logit If TRUE, the outcome is transformed to binary with a logit link.
#' @param seed RNG seed.
#' @return Returns a list containing, in the following order: data.frame with the observed outcome (with noise) and the true outcome (without noise), list containing the data.frame of the genetic variants (G), the data.frame of the \eqn{e} environments (E) and the data.frame of the \eqn{z} environments (Z), vector of the true genetic coefficients, vector of the true \eqn{e} environmental coefficients, vector of the true \eqn{z} environmental coefficients, vector of the true main model coefficients
#' @examples
#'	example_3way_3latent(5,1,logit=FALSE)
#'	example_3way_3latent(5,0,logit=TRUE)
#' @export
"example_3way_3latent"

#' @title Longitudinal folds
#' @description Function to create folds adequately for longitudinal datasets by forcing every observation with the same id to be in the same fold. Can be used with LEGIT_cv to make sure that the cross-validation folds are appropriate when using longitudinal data.
#' @param cv_iter Number of cross-validation iterations (Default = 1).
#' @param cv_folds Number of cross-validation folds (Default = 10).
#' @param id Factor vector containing the id number of each observation.
#' @param formula Optional Model formula. If data and formula are provided, only the non-missing observations will be used when creating the folds (Put "formula" here if you have missing data).
#' @param data Optional data.frame used for the formula. If data and formula are provided, only the non-missing observations will be used when creating the folds (Put "data" here if you have missing data).
#' @param data_needed Optional data.frame with variables that have to be included (Put "cbind(genes,env)"" or "latent_var" here if you have missing data).
#' @return Returns a list of vectors containing the fold number for each observation
#' @examples
#'	train = example_2way(500, 1, seed=777)
#'	# Assuming it's longitudinal with 4 timepoints, even though it's not
#'	id = factor(rep(1:125,each=4))
#'	fit_cv = LEGIT_cv(train$data, train$G, train$E, y ~ G*E, folds=longitudinal_folds(1,10, id))
#' @export
"longitudinal_folds"

#' @title Latent Environmental & Genetic InTeraction (LEGIT) model
#' @description Constructs a generalized linear model (glm) with a weighted latent environmental score and weighted latent genetic score using alternating optimization.
#' @param data data.frame of the dataset to be used. 
#' @param genes data.frame of the variables inside the genetic score \emph{G} (can be any sort of variable, doesn't even have to be genetic).
#' @param env data.frame of the variables inside the environmental score \emph{E} (can be any sort of variable, doesn't even have to be environmental).
#' @param formula Model formula. Use \emph{E} for the environmental score and \emph{G} for the genetic score. Do not manually code interactions, write them in the formula instead (ex: G*E*z or G:E:z).
#' @param start_genes Optional starting points for genetic score (must be same length as the number of columns of \code{genes}).
#' @param start_env Optional starting points for environmental score (must be same length as the number of columns of \code{env}).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param print If FALSE, nothing except warnings will be printed. (Default = TRUE).
#' @return Returns an object of the class "LEGIT" which is list containing, in the following order: a glm fit of the main model, a glm fit of the genetic score, a glm fit of the environmental score, a list of the true model parameters (AIC, BIC, rank, df.residual, null.deviance) for which the individual model parts (main, genetic, environmental) don't estimate properly.
#' @examples
#'	train = example_2way(500, 1, seed=777)
#'	fit_best = LEGIT(train$data, train$G, train$E, y ~ G*E, train$coef_G, train$coef_E)
#'	fit_default = LEGIT(train$data, train$G, train$E, y ~ G*E)
#'	summary(fit_default)
#'	summary(fit_best)
#'	train = example_3way(500, 2.5, seed=777)
#'	fit_best = LEGIT(train$data, train$G, train$E, y ~ G*E*z, train$coef_G, train$coef_E)
#'	fit_default = LEGIT(train$data, train$G, train$E, y ~ G*E*z)
#'	summary(fit_default)
#'	summary(fit_best)
#' @import formula.tools stats
#' @references Alexia Jolicoeur-Martineau, Ashley Wazana, Eszter Szekely, Meir Steiner, Alison S. Fleming, James L. Kennedy, Michael J. Meaney, Celia M.T. Greenwood and the MAVAN team. \emph{Alternating optimization for GxE modelling with weighted genetic and environmental scores: examples from the MAVAN study} (2017). arXiv:1703.08111.
#' @export
"LEGIT"

#' @title Independent Multiple Latent Environmental & Genetic InTeraction (IMLEGIT) model
#' @description Constructs a generalized linear model (glm) with latent variables using alternating optimization. This is an extension of the LEGIT model to accommodate more than 2 latent variables.
#' @param data data.frame of the dataset to be used. 
#' @param latent_var list of data.frame. The elements of the list are the datasets used to construct each latent variable. For interpretability and proper convergence, not using the same variable in more than one latent variable is highly recommended. It is recommended to set names to the list elements to prevent confusion because otherwise the latent variables will be named L1, L2, ... (See examples below for more details)
#' @param formula Model formula. The names of \code{latent_var} can be used in the formula to represent the latent variables. If names(\code{latent_var}) is NULL, then L1, L2, ... can be used in formula to represent the latent variables. Do not manually code interactions, write them in the formula instead (ex: G*E1*E2 or G:E1:E2).
#' @param start_latent_var Optional list of starting points for each latent variable (The list must have the same length as the number of latent variables and each element of the list must have the same length as the number of variables of the corresponding latent variable).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param print If FALSE, nothing except warnings will be printed. (Default = TRUE).
#' @return Returns an object of the class "IMLEGIT" which is list containing, in the following order: a glm fit of the main model, a list of the glm fits of the latent variables and a list of the true model parameters (AIC, BIC, rank, df.residual, null.deviance) for which the individual model parts (main, genetic, environmental) don't estimate properly.
#' @examples
#'	train = example_2way(500, 1, seed=777)
#'	fit_best = IMLEGIT(train$data, list(G=train$G, E=train$E), y ~ G*E, 
#'	list(train$coef_G, train$coef_E))
#'	fit_default = IMLEGIT(train$data, list(G=train$G, E=train$E), y ~ G*E)
#'	summary(fit_default)
#'	summary(fit_best)
#'	train = example_3way_3latent(500, 1, seed=777)
#'	fit_best = IMLEGIT(train$data, train$latent_var, y ~ G*E*Z, 
#'	list(train$coef_G, train$coef_E, train$coef_Z))
#'	fit_default = IMLEGIT(train$data, train$latent_var, y ~ G*E*Z)
#'	summary(fit_default)
#'	summary(fit_best)
#' @import formula.tools stats
#' @references Alexia Jolicoeur-Martineau, Ashley Wazana, Eszter Szekely, Meir Steiner, Alison S. Fleming, James L. Kennedy, Michael J. Meaney, Celia M.T. Greenwood and the MAVAN team. \emph{Alternating optimization for GxE modelling with weighted genetic and environmental scores: examples from the MAVAN study} (2017). arXiv:1703.08111.
#' @export
"IMLEGIT"

#' @title Predictions of LEGIT fits
#' @description Predictions of LEGIT fits.
#' @param object An object of class "LEGIT", usually, a result of a call to LEGIT.
#' @param data data.frame of the dataset to be used.
#' @param genes data.frame of the variables inside the genetic score \emph{G} (can be any sort of variable, doesn't even have to be genetic).
#' @param env data.frame of the variables inside the environmental score \emph{E} (can be any sort of variable, doesn't even have to be environmental).
#' @param ... Further arguments passed to or from other methods.
#' @return Returns a vector with the predicted values.
#' @examples
#'	train = example_2way(250, 1, seed=777)
#'	test = example_2way(100, 1, seed=666)
#'	fit = LEGIT(train$data, train$G, train$E, y ~ G*E)
#'	ssres = sum((test$data$y - predict(fit, test$data, test$G, test$E))^2)
#'	sstotal = sum((test$data$y - mean(test$data$y))^2)
#'	R2 = 1 - ssres/sstotal
#' @export
"predict.LEGIT"

#' @title Predictions of IMLEGIT fits
#' @description Predictions of IMLEGIT fits.
#' @param object An object of class "IMLEGIT", usually, a result of a call to IMLEGIT.
#' @param data data.frame of the dataset to be used.
#' @param latent_var list of data.frame. The elements of the list are the datasets used to construct each latent variable. For interpretability and proper convergence, not using the same variable in more than one latent variable is highly recommended. It is recommended to set names to the list elements to prevent confusion because otherwise the latent variables will be named L1, L2, ...
#' @param ... Further arguments passed to or from other methods.
#' @return Returns a vector with the predicted values.
#' @examples
#'	train = example_2way(250, 1, seed=777)
#'	test = example_2way(100, 1, seed=666)
#'	fit = IMLEGIT(train$data, list(G=train$G, E=train$E), y ~ G*E)
#'	ssres = sum((test$data$y - predict(fit, test$data, list(G=test$G, E=test$E)))^2)
#'	sstotal = sum((test$data$y - mean(test$data$y))^2)
#'	R2 = 1 - ssres/sstotal
#'	R2
#' @export
"predict.IMLEGIT"

#' @title Summarizing LEGIT fits
#' @description Shows the summary for all parts (main, genetic, environmental) of the LEGIT model.
#' @param object An object of class "LEGIT", usually, a result of a call to LEGIT.
#' @param ... Further arguments passed to or from other methods.
#' @return Returns a list of objects of class "summary.glm" containing the summary of each parts (main, genetic, environmental) of the model.
#' @examples
#' 	train = example_2way(250, 1, seed=777)
#'	fit_default = LEGIT(train$data, train$G, train$E, y ~ G*E)
#'	summary(fit_default)
#' @export
"summary.LEGIT"

#' @title Summarizing IMLEGIT fits
#' @description Shows the summary for all parts (main and latent variables) of the LEGIT model.
#' @param object An object of class "IMLEGIT", usually, a result of a call to LEGIT.
#' @param ... Further arguments passed to or from other methods.
#' @return Returns a list of objects of class "summary.glm" containing the summary of each parts (main and latent variables) of the model.
#' @examples
#' 	train = example_2way(250, 1, seed=777)
#'	fit_default = IMLEGIT(train$data, list(G=train$G, E=train$E), y ~ G*E)
#'	summary(fit_default)
#' @export
"summary.IMLEGIT"

#' @title Cross-validation for the LEGIT model
#' @description Uses cross-validation on the LEGIT model. Note that this is not a very fast implementation since it was written in R.
#' @param data data.frame of the dataset to be used.
#' @param genes data.frame of the variables inside the genetic score \emph{G} (can be any sort of variable, doesn't even have to be genetic).
#' @param env data.frame of the variables inside the environmental score \emph{E} (can be any sort of variable, doesn't even have to be environmental).
#' @param formula Model formula. Use \emph{E} for the environmental score and \emph{G} for the genetic score. Do not manually code interactions, write them in the formula instead (ex: G*E*z or G:E:z).
#' @param cv_iter Number of cross-validation iterations (Default = 5).
#' @param cv_folds Number of cross-validation folds (Default = 10). Using \code{cv_folds=NROW(data)} will lead to leave-one-out cross-validation.
#' @param folds Optional list of vectors containing the fold number for each observation. Bypass cv_iter and cv_folds. Setting your own folds could be important for certain data types like time series or longitudinal data.
#' @param classification Set to TRUE if you are doing classification (binary outcome).
#' @param start_genes Optional starting points for genetic score (must be same length as the number of columns of \code{genes}).
#' @param start_env Optional starting points for environmental score (must be same length as the number of columns of \code{env}).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param seed Seed for cross-validation folds.
#' @param Huber_p Parameter controlling the Huber cross-validation error (Default =1).
#' @param id Optional id of observations, can be a vector or data.frame (only used when returning list of possible outliers).
#' @return If \code{classification} = FALSE, returns a list containing, in the following order: a vector of the cross-validated \eqn{R^2} at each iteration, a vector of the Huber cross-validation error at each iteration, a vector of the L1-norm cross-validation error at each iteration, a matrix of the possible outliers (standardized residuals > 2.5 or < -2.5) and their corresponding standardized residuals and standardized pearson residuals. If \code{classification} = TRUE, returns a list containing, in the following order: a vector of the cross-validated \eqn{R^2} at each iteration, a vector of the Huber cross-validation error at each iteration, a vector of the L1-norm cross-validation error at each iteration, a vector of the AUC at each iteration, a matrix of the best choice of threshold (based on Youden index) and the corresponding specificity and sensitivity at each iteration, and a list of objects of class "roc" (to be able to make roc curve plots) at each iteration. The Huber and L1-norm cross-validation errors are alternatives to the usual cross-validation L2-norm error (which the \eqn{R^2} is based on) that are more resistant to outliers, the lower the values the better.
#' @examples
#'	\dontrun{
#'	train = example_3way(250, 2.5, seed=777)
#'	# Cross-validation 4 times with 5 Folds
#'	cv_5folds = LEGIT_cv(train$data, train$G, train$E, y ~ G*E*z, cv_iter=4, cv_folds=5)
#'	cv_5folds
#'	# Leave-one-out cross-validation (Note: very slow)
#'	cv_loo = LEGIT_cv(train$data, train$G, train$E, y ~ G*E*z, cv_iter=1, cv_folds=250)
#'	cv_loo
#'	# Cross-validation 4 times with 5 Folds (binary outcome)
#'	train_bin = example_2way(500, 2.5, logit=TRUE, seed=777)
#'	cv_5folds_bin = LEGIT_cv(train_bin$data, train_bin$G, train_bin$E, y ~ G*E, 
#'	cv_iter=4, cv_folds=5, classification=TRUE, family=binomial)
#'	cv_5folds_bin
#'	par(mfrow=c(2,2))
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[1]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[2]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[3]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[4]])
#'	}
#' @references Denis Heng-Yan Leung. \emph{Cross-validation in nonparametric regression with outliers.} Annals of Statistics (2005): 2291-2310.
#' @export
"LEGIT_cv"

#' @title Cross-validation for the IMLEGIT model
#' @description Uses cross-validation on the IMLEGIT model. Note that this is not a very fast implementation since it was written in R.
#' @param data data.frame of the dataset to be used.
#' @param latent_var list of data.frame. The elements of the list are the datasets used to construct each latent variable. For interpretability and proper convergence, not using the same variable in more than one latent variable is highly recommended. It is recommended to set names to the list elements to prevent confusion because otherwise the latent variables will be named L1, L2, ...
#' @param formula Model formula. The names of \code{latent_var} can be used in the formula to represent the latent variables. If names(\code{latent_var}) is NULL, then L1, L2, ... can be used in formula to represent the latent variables. Do not manually code interactions, write them in the formula instead (ex: G*E1*E2 or G:E1:E2).
#' @param cv_iter Number of cross-validation iterations (Default = 5).
#' @param cv_folds Number of cross-validation folds (Default = 10). Using \code{cv_folds=NROW(data)} will lead to leave-one-out cross-validation.
#' @param folds Optional list of vectors containing the fold number for each observation. Bypass cv_iter and cv_folds. Setting your own folds could be important for certain data types like time series or longitudinal data.
#' @param classification Set to TRUE if you are doing classification (binary outcome).
#' @param start_latent_var Optional list of starting points for each latent variable (The list must have the same length as the number of latent variables and each element of the list must have the same length as the number of variables of the corresponding latent variable).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param seed Seed for cross-validation folds.
#' @param Huber_p Parameter controlling the Huber cross-validation error (Default =1).
#' @param id Optional id of observations, can be a vector or data.frame (only used when returning list of possible outliers).
#' @return If \code{classification} = FALSE, returns a list containing, in the following order: a vector of the cross-validated \eqn{R^2} at each iteration, a vector of the Huber cross-validation error at each iteration, a vector of the L1-norm cross-validation error at each iteration, a matrix of the possible outliers (standardized residuals > 2.5 or < -2.5) and their corresponding standardized residuals and standardized pearson residuals. If \code{classification} = TRUE, returns a list containing, in the following order: a vector of the cross-validated \eqn{R^2} at each iteration, a vector of the Huber cross-validation error at each iteration, a vector of the L1-norm cross-validation error at each iteration, a vector of the AUC at each iteration, a matrix of the best choice of threshold (based on Youden index) and the corresponding specificity and sensitivity at each iteration, and a list of objects of class "roc" (to be able to make roc curve plots) at each iteration. The Huber and L1-norm cross-validation errors are alternatives to the usual cross-validation L2-norm error (which the \eqn{R^2} is based on) that are more resistant to outliers, the lower the values the better.
#' @examples
#'	\dontrun{
#'	train = example_3way_3latent(250, 1, seed=777)
#'	# Cross-validation 4 times with 5 Folds
#'	cv_5folds = IMLEGIT_cv(train$data, train$latent_var, y ~ G*E*Z, cv_iter=4, cv_folds=5)
#'	cv_5folds
#'	# Leave-one-out cross-validation (Note: very slow)
#'	cv_loo = IMLEGIT_cv(train$data, train$latent_var, y ~ G*E*Z, cv_iter=1, cv_folds=250)
#'	cv_loo
#'	# Cross-validation 4 times with 5 Folds (binary outcome)
#'	train_bin = example_2way(500, 2.5, logit=TRUE, seed=777)
#'	cv_5folds_bin = IMLEGIT_cv(train_bin$data, list(G=train_bin$G, E=train_bin$E), y ~ G*E, 
#'	cv_iter=4, cv_folds=5, classification=TRUE, family=binomial)
#'	cv_5folds_bin
#'	par(mfrow=c(2,2))
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[1]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[2]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[3]])
#'	pROC::plot.roc(cv_5folds_bin$roc_curve[[4]])
#'	}
#' @references Denis Heng-Yan Leung. \emph{Cross-validation in nonparametric regression with outliers.} Annals of Statistics (2005): 2291-2310.
#' @export
"IMLEGIT_cv"

#' Internal function that does the forward step for the stepwise function.
#' @param empty_start_dataset If TRUE, the initial dataset is empty.
#' @param fit Current best fit.
#' @param ... Same parameters as in the stepwise function.
#' @return Returns fit, start_genes, start_env and genes_current, genes_toadd if search="genes" or env_current and env_toadd if search="env".
#' @keywords internal
"forward_step"

#' Internal function that does the forward step for the stepwise function.
#' @param empty_start_dataset If TRUE, the initial dataset is empty.
#' @param fit Current best fit.
#' @param ... Same parameters as in the stepwise function.
#' @return Returns fit, start_latent_var, latent_var_current and latent_var_toadd.
#' @keywords internal
"forward_step_IM"

#' Internal function that does the backward step for the stepwise IM function.
#' @param empty_start_dataset If TRUE, the initial dataset is empty.
#' @param fit Current best fit.
#' @param ... Same parameters as in the stepwise function.
#' @return Returns fit, start_genes, start_env and genes_current, genes_dropped if search="genes" or env_current and env_dropped if search="env".
#' @keywords internal
"backward_step"

#' Internal function that does the backward step for the stepwise IM function.
#' @param empty_start_dataset If TRUE, the initial dataset is empty.
#' @param fit Current best fit.
#' @param ... Same parameters as in the stepwise function.
#' @return Returns fit, start_latent_var, latent_var_current and latent_var_dropped.
#' @keywords internal
"backward_step_IM"

#' @title Stepwise search for the best subset of genetic variants or environments with the LEGIT model
#' @description Adds the best variable or drops the worst variable one at a time in the genetic (if \code{search="genes"}) or environmental score (if \code{search="env"}). You can select the desired search criterion (AIC, BIC, cross-validation error, cross-validation AUC) to determine which variable is the best/worst and should be added/dropped. If using cross-validation (\code{search_criterion="cv"} or \code{search_criterion="cv_AUC"}), to prevent cross-validating with each variable (extremely slow), we recommend setting a p-value threshold (\code{p_threshold}) and forcing the algorithm not to look at models with bigger AIC (\code{exclude_worse_AIC=TRUE}).
#' @param data data.frame of the dataset to be used.
#' @param formula Model formula. Use \emph{E} for the environmental score and \emph{G} for the genetic score. Do not manually code interactions, write them in the formula instead (ex: G*E*z or G:E:z).
#' @param interactive_mode If TRUE, uses interactive mode. In interactive mode, at each iteration, the user is shown the AIC, BIC, p-value and also the cross-validation \eqn{R^2} if \code{search_criterion="cv"} and the cross-validation AUC if \code{search_criterion="cv_AUC"} for the best 5 variables. The user must then enter a number between 1 and 5 to select the variable to be added, entering anything else will stop the search.
#' @param genes_original data.frame of the variables inside the genetic score \emph{G} (can be any sort of variable, doesn't even have to be genetic).
#' @param env_original data.frame of the variables inside the environmental score \emph{E} (can be any sort of variable, doesn't even have to be environmental).
#' @param genes_extra data.frame of the additionnal variables to try including inside the genetic score \emph{G} (can be any sort of variable, doesn't even have to be genetic). Set to NULL if using a backward search.
#' @param env_extra data.frame of the variables to try including inside the environmental score \emph{E} (can be any sort of variable, doesn't even have to be environmental). Set to NULL if using a backward search.
#' @param search_type If \code{search_type="forward"}, uses a forward search. If \code{search_type="backward"}, uses backward search. If \code{search_type="bidirectional-forward"}, uses bidirectional search (that starts as a forward search). If \code{search_type="bidirectional-backward"}, uses bidirectional search (that starts as a backward search).
#' @param search If \code{search="genes"}, uses a stepwise search for the genetic score variables. If \code{search="env"}, uses a stepwise search for the environmental score variables. If \code{search="both"}, uses a stepwise search for both the gene and environmental score variables (Default = "both").
#' @param search_criterion Criterion used to determine which variable is the best to add or worst to drop. if \code{search_criterion="AIC"}, uses the AIC, if \code{search_criterion="BIC"}, uses the BIC, if \code{search_criterion="cv"}, uses the cross-validation error, if \cr \code{search_criterion="cv_AUC"}, uses the cross-validated AUC, if \code{search_criterion="cv_Huber"}, uses the Huber cross-validation error, if \code{search_criterion="cv_AUC"}, uses the L1-norm cross-validation error (Default = "AIC"). The Huber and L1-norm cross-validation errors are alternatives to the usual cross-validation L2-norm error (which the \eqn{R^2} is based on) that are more resistant to outliers, the lower the values the better.
#' @param forward_exclude_p_bigger If p-value > \code{forward_exclude_p_bigger}, we do not consider the variable for inclusion in the forward steps (Default = .20).
#' @param backward_exclude_p_smaller If p-value < \code{backward_exclude_p_smaller}, we do not consider the variable for removal in the backward steps (Default = .01).
#' @param exclude_worse_AIC If AIC with variable > AIC without variable, we ignore the variable (Default = TRUE).
#' @param max_steps Maximum number of steps taken (Default = 50).
#' @param cv_iter Number of cross-validation iterations (Default = 5).
#' @param cv_folds Number of cross-validation folds (Default = 10). Using \code{cv_folds=NROW(data)} will lead to leave-one-out cross-validation.
#' @param folds Optional list of vectors containing the fold number for each observation. Bypass cv_iter and cv_folds. Setting your own folds could be important for certain data types like time series or longitudinal data.
#' @param classification Set to TRUE if you are doing classification (binary outcome).
#' @param start_genes Optional starting points for genetic score (must be same length as the number of columns of \code{genes}).
#' @param start_env Optional starting points for environmental score (must be same length as the number of columns of \code{env}).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param seed Seed for cross-validation folds.
#' @param print If TRUE, print all the steps and notes/warnings. Highly recommended unless you are batch running multiple stepwise searchs. (Default=TRUE).
#' @param Huber_p Parameter controlling the Huber cross-validation error (Default =1).
#' @param remove_miss If TRUE, remove missing data completely, otherwise missing data is only removed when adding or dropping a variable (Default = FALSE).
#' @return Returns an object of the class "LEGIT" which is list containing, in the following order: a glm fit of the main model, a glm fit of the genetic score, a glm fit of the environmental score, a list of the true model parameters (AIC, BIC, rank, df.residual, null.deviance) for which the individual model parts (main, genetic, environmental) don't estimate properly.
#' @examples
#'	\dontrun{
#'	## Continuous example
#'	train = example_3way(250, 2.5, seed=777)
#'	# Forward search for genes based on BIC (in interactive mode)
#'	forward_genes_BIC = stepwise_search(train$data, genes_extra=train$G, env_original=train$E,
#'	formula=y ~ E*G*z,search_type="forward", search="genes", search_criterion="BIC",
#'	interactive_mode=TRUE)
#'	# Bidirectional-backward search for environments based on cross-validation error
#'	bidir_backward_env_cv = stepwise_search(train$data, genes_original=train$G, env_original=train$E,
#'	formula=y ~ E*G*z,search_type="bidirectional-backward", search="env", search_criterion="cv")
#'	## Binary example
#'	train_bin = example_2way(500, 2.5, logit=TRUE, seed=777)
#'	# Forward search for genes based on cross-validated AUC (in interactive mode)
#'	forward_genes_AUC = stepwise_search(train_bin$data, genes_extra=train_bin$G, 
#'	env_original=train_bin$E, formula=y ~ E*G,search_type="forward", search="genes", 
#'	search_criterion="cv_AUC", classification=TRUE, family=binomial, interactive_mode=TRUE)
#'	# Forward search for genes based on AIC
#'	bidir_forward_genes_AIC = stepwise_search(train_bin$data, genes_extra=train_bin$G, 
#'	env_original=train_bin$E, formula=y ~ E*G,search_type="bidirectional-forward", search="genes", 
#'	search_criterion="AIC", classification=TRUE, family=binomial)
#'	}
#' @export
"stepwise_search"

#' @title Stepwise search for the best subset of elements in the latent variables with the IMLEGIT model
#' @description Adds the best variable or drops the worst variable one at a time in the latent variables. You can select the desired search criterion (AIC, BIC, cross-validation error, cross-validation AUC) to determine which variable is the best/worst and should be added/dropped. If using cross-validation (\code{search_criterion="cv"} or \code{search_criterion="cv_AUC"}), to prevent cross-validating with each variable (extremely slow), we recommend setting a p-value threshold (\code{p_threshold}) and forcing the algorithm not to look at models with bigger AIC (\code{exclude_worse_AIC=TRUE}).
#' @param data data.frame of the dataset to be used.
#' @param formula Model formula. Use \emph{E} for the environmental score and \emph{G} for the genetic score. Do not manually code interactions, write them in the formula instead (ex: G*E*z or G:E:z).
#' @param interactive_mode If TRUE, uses interactive mode. In interactive mode, at each iteration, the user is shown the AIC, BIC, p-value and also the cross-validation \eqn{R^2} if \code{search_criterion="cv"} and the cross-validation AUC if \code{search_criterion="cv_AUC"} for the best 5 variables. The user must then enter a number between 1 and 5 to select the variable to be added, entering anything else will stop the search.
#' @param latent_var_original list of data.frame. The elements of the list are the datasets used to construct each latent variable. For interpretability and proper convergence, not using the same variable in more than one latent variable is highly recommended. It is recommended to set names to the list elements to prevent confusion because otherwise the latent variables will be named L1, L2, ...
#' @param latent_var_extra list of data.frame (with the same structure as latent_var_original) containing the additionnal elements to try including inside the latent variables. Set to NULL if using a backward search.
#' @param search_type If \code{search_type="forward"}, uses a forward search. If \code{search_type="backward"}, uses backward search. If \code{search_type="bidirectional-forward"}, uses bidirectional search (that starts as a forward search). If \code{search_type="bidirectional-backward"}, uses bidirectional search (that starts as a backward search).
#' @param search If \code{search=0}, uses a stepwise search for all latent variables. Otherwise, if search = i, uses a stepwise search on the i-th latent variable (Default = 0).
#' @param search_criterion Criterion used to determine which variable is the best to add or worst to drop. if \code{search_criterion="AIC"}, uses the AIC, if \code{search_criterion="BIC"}, uses the BIC, if \code{search_criterion="cv"}, uses the cross-validation error, if \cr \code{search_criterion="cv_AUC"}, uses the cross-validated AUC, if \code{search_criterion="cv_Huber"}, uses the Huber cross-validation error, if \code{search_criterion="cv_AUC"}, uses the L1-norm cross-validation error (Default = "AIC"). The Huber and L1-norm cross-validation errors are alternatives to the usual cross-validation L2-norm error (which the \eqn{R^2} is based on) that are more resistant to outliers, the lower the values the better.
#' @param forward_exclude_p_bigger If p-value > \code{forward_exclude_p_bigger}, we do not consider the variable for inclusion in the forward steps (Default = .20).
#' @param backward_exclude_p_smaller If p-value < \code{backward_exclude_p_smaller}, we do not consider the variable for removal in the backward steps (Default = .01).
#' @param exclude_worse_AIC If AIC with variable > AIC without variable, we ignore the variable (Default = TRUE).
#' @param max_steps Maximum number of steps taken (Default = 50).
#' @param cv_iter Number of cross-validation iterations (Default = 5).
#' @param cv_folds Number of cross-validation folds (Default = 10). Using \code{cv_folds=NROW(data)} will lead to leave-one-out cross-validation.
#' @param folds Optional list of vectors containing the fold number for each observation. Bypass cv_iter and cv_folds. Setting your own folds could be important for certain data types like time series or longitudinal data.
#' @param classification Set to TRUE if you are doing classification (binary outcome).
#' @param start_latent_var Optional list of starting points for each latent variable (The list must have the same length as the number of latent variables and each element of the list must have the same length as the number of variables of the corresponding latent variable).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param seed Seed for cross-validation folds.
#' @param print If TRUE, print all the steps and notes/warnings. Highly recommended unless you are batch running multiple stepwise searchs. (Default=TRUE).
#' @param Huber_p Parameter controlling the Huber cross-validation error (Default =1).
#' @param remove_miss If TRUE, remove missing data completely, otherwise missing data is only removed when adding or dropping a variable (Default = FALSE).
#' @return Returns an object of the class "IMLEGIT" which is list containing, in the following order: a glm fit of the main model, a list of the glm fits of the latent variables and a list of the true model parameters (AIC, BIC, rank, df.residual, null.deviance) for which the individual model parts (main, genetic, environmental) don't estimate properly.
#' @examples
#'	\dontrun{
#'	## Example
#'	train = example_3way_3latent(250, 1, seed=777)
#'	# Forward search for genes based on BIC (in interactive mode)
#'	forward_genes_BIC = stepwise_search_IM(train$data, 
#'	latent_var_original=list(G=NULL, E=train$latent_var$E, Z=train$latent_var$Z),
#'	latent_var_extra=list(G=train$latent_var$G,E=NULL,Z=NULL), 
#'	formula=y ~ E*G*Z,search_type="forward", search=1, search_criterion="BIC",
#'	interactive_mode=TRUE)
#'	# Bidirectional-backward search for everything based on AIC
#'	bidir_backward_AIC = stepwise_search_IM(train$data, latent_var_extra=NULL, 
#'	latent_var_original=train$latent_var,
#'	formula=y ~ E*G*Z,search_type="bidirectional-backward", search=0, search_criterion="AIC")
#'	}
#' @export
"stepwise_search_IM"

#' @title Bootstrap variable selection (for IMLEGIT)
#' @description Creates bootstrap samples, run stepwise search on all of them and then report the percentage of times that each variable was selected. This is very computationally demanding. With small sample sizes, variable selection can be unstable and bootstrap can be used to give us an idea of the degree of certitude that a variable should be included or not.
#' @param data data.frame of the dataset to be used.
#' @param formula Model formula. Use \emph{E} for the environmental score and \emph{G} for the genetic score. Do not manually code interactions, write them in the formula instead (ex: G*E*z or G:E:z).
#' @param boot_iter number of bootstrap samples (Default = 1000).
#' @param boot_size Optional size of the bootstrapped samples (Default = number of observations).
#' @param boot_group Optional vector which represents the group associated with each observation. Sampling will be done by group instead of by observations (very important if you have longitudinal data). The sample sizes of the bootstrap samples might differ by up to "\code{boot_size} - maximum group size" observations.
#' @param latent_var_original list of data.frame. The elements of the list are the datasets used to construct each latent variable. For interpretability and proper convergence, not using the same variable in more than one latent variable is highly recommended. It is recommended to set names to the list elements to prevent confusion because otherwise the latent variables will be named L1, L2, ...
#' @param latent_var_extra list of data.frame (with the same structure as latent_var_original) containing the additional elements to try including inside the latent variables. Set to NULL if using a backward search.
#' @param search_type If \code{search_type="forward"}, uses a forward search. If \code{search_type="backward"}, uses backward search. If \code{search_type="bidirectional-forward"}, uses bidirectional search (that starts as a forward search). If \code{search_type="bidirectional-backward"}, uses bidirectional search (that starts as a backward search).
#' @param search If \code{search=0}, uses a stepwise search for all latent variables. Otherwise, if search = i, uses a stepwise search on the i-th latent variable (Default = 0).
#' @param search_criterion Criterion used to determine which variable is the best to add or worst to drop. if \code{search_criterion="AIC"}, uses the AIC, if \code{search_criterion="BIC"}, uses the BIC, if \code{search_criterion="cv"}, uses the cross-validation error, if \cr \code{search_criterion="cv_AUC"}, uses the cross-validated AUC, if \code{search_criterion="cv_Huber"}, uses the Huber cross-validation error, if \code{search_criterion="cv_AUC"}, uses the L1-norm cross-validation error (Default = "AIC"). The Huber and L1-norm cross-validation errors are alternatives to the usual cross-validation L2-norm error (which the \eqn{R^2} is based on) that are more resistant to outliers, the lower the values the better.
#' @param forward_exclude_p_bigger If p-value > \code{forward_exclude_p_bigger}, we do not consider the variable for inclusion in the forward steps (Default = .20).
#' @param backward_exclude_p_smaller If p-value < \code{backward_exclude_p_smaller}, we do not consider the variable for removal in the backward steps (Default = .01).
#' @param exclude_worse_AIC If AIC with variable > AIC without variable, we ignore the variable (Default = TRUE).
#' @param max_steps Maximum number of steps taken (Default = 50).
#' @param cv_iter Number of cross-validation iterations (Default = 5).
#' @param cv_folds Number of cross-validation folds (Default = 10). Using \code{cv_folds=NROW(data)} will lead to leave-one-out cross-validation.
#' @param folds Optional list of vectors containing the fold number for each observation. Bypass cv_iter and cv_folds. Setting your own folds could be important for certain data types like time series or longitudinal data.
#' @param classification Set to TRUE if you are doing classification (binary outcome).
#' @param start_latent_var Optional list of starting points for each latent variable (The list must have the same length as the number of latent variables and each element of the list must have the same length as the number of variables of the corresponding latent variable).
#' @param eps Threshold for convergence (.01 for quick batch simulations, .0001 for accurate results).
#' @param maxiter Maximum number of iterations.
#' @param family Outcome distribution and link function (Default = gaussian).
#' @param seed Optional seed for bootstrap.
#' @param Huber_p Parameter controlling the Huber cross-validation error (Default =1).
#' @param progress If TRUE, shows the progress done (Default=TRUE).
#' @param n_cluster Number of parallel clusters, I recommend using the number of CPU cores - 1 (Default = 1).
#' @return Returns a list of vectors containing the percentage of times that each variable was selected within each latent variable.
#' @examples
#'	\dontrun{
#'	## Example
#'	train = example_3way_3latent(250, 2, seed=777)
#'	# Bootstrap with Bidirectional-backward search for everything based on AIC
#'	# Normally you should use a lot more than 10 iterations and extra CPUs (n_cluster)
#'	boot = bootstrap_var_select(train$data, latent_var_extra=NULL, 
#'	latent_var_original=train$latent_var,
#'	formula=y ~ E*G*Z,search_type="bidirectional-backward", search=0, 
#'	search_criterion="AIC", boot_iter=10, n_cluster=1)
#'	# Assuming it's longitudinal with 5 timepoints, even though it's not
#'	id = factor(rep(1:50,each=5))
#'	boot_longitudinal = bootstrap_var_select(train$data, latent_var_extra=NULL, 
#'	latent_var_original=train$latent_var,
#'	formula=y ~ E*G*Z,search_type="bidirectional-backward", search=0, 
#'	search_criterion="AIC", boot_iter=10, n_cluster=1, boot_group=id)
#'	}
#' @import foreach snow doSNOW utils iterators
#' @references Peter C Austin and Jack V Tu. \emph{Bootstrap Methods for Developing Predictive Models} (2012). dx.doi.org/10.1198/0003130043277.
#' @references Mark Reiser, Lanlan Yao, Xiao Wang, Jeanne Wilcox and Shelley Gray. \emph{A Comparison of Bootstrap Confidence Intervals for Multi-level Longitudinal Data Using Monte-Carlo Simulation} (2017). 10.1007/978-981-10-3307-0_17.
#' @export
"bootstrap_var_select"

example_2way = function(N, sigma=1, logit=FALSE, seed=NULL){
	set.seed(seed)
	g1 = rbinom(N,1,.30)
	g2 = rbinom(N,1,.30)
	g3 = rbinom(N,1,.30)
	g4 = rbinom(N,1,.30)
	g1_g3 = g1*g3
	g2_g3 = g2*g3
	e1 = rnorm(N,0,1.5)
	e2 = rnorm(N,0,1.5)
	e3 = rnorm(N,0,1.5)
	g = (.2*g1 + .15*g2 - .3*g3 + .1*g4 + .05*g1_g3 + .2*g2_g3)
	e = -.45*e1 + .35*e2 + .2*e3
	y_true = -1 + 2*g + 3*e + 4*g*e
	if (logit){
		y_true = 1/(1+exp(-(y_true)))
		y = rbinom(N,1,y_true)
	}
	else{
		eps = rnorm(N,0,sigma)
		y = y_true + eps
	}
	return(list(data=data.frame(y,y_true),G=data.frame(g1,g2,g3,g4,g1_g3,g2_g3),E=data.frame(e1,e2,e3),coef_G=c(.2,.15,-.3,.1,.05,.2),coef_E=c(-.45,.35,.2), coef_main=c(5,2,3,4)))
}

example_3way = function(N, sigma=2.5, logit=FALSE, seed=NULL){
	set.seed(seed)
	g1 = rbinom(N,1,.30)
	g2 = rbinom(N,1,.30)
	g3 = rbinom(N,1,.30)
	g4 = rbinom(N,1,.30)
	g1_g3 = g1*g3
	g2_g3 = g2*g3
	e1 = rnorm(N,0,1.5)
	e2 = rnorm(N,0,1.5)
	e3 = rnorm(N,0,1.5)
	g = (.2*g1 + .15*g2 - .3*g3 + .1*g4 + .05*g1_g3 + .2*g2_g3)
	e = -.45*e1 + .35*e2 + .2*e3
	z = rnorm(N,3,1)
	y_true = -2 + 2*g + 3*e + z + 5*g*e - 1.5*z*e + 2*z*g + 2*z*g*e
	if (logit){
		y_true = 1/(1+exp(-(y_true)))
		y = rbinom(N,1,y_true)
	}
	else{
		eps = rnorm(N,0,sigma)
		y = y_true + eps
	}
	return(list(data=data.frame(y,y_true,z),G=data.frame(g1,g2,g3,g4,g1_g3,g2_g3),E=data.frame(e1,e2,e3),coef_G=c(.2,.15,-.3,.1,.05,.2),coef_E=c(-.45,.35,.2), coef_main=c(5,2,3,1,5,1.5,2,2)))
}

example_3way_3latent = function(N, sigma=1, logit=FALSE, seed=NULL){
	set.seed(seed)
	g1 = rbinom(N,1,.30)
	g2 = rbinom(N,1,.30)
	g3 = rbinom(N,1,.30)
	g4 = rbinom(N,1,.30)
	g1_g3 = g1*g3
	g2_g3 = g2*g3
	e1 = rnorm(N,0,1.5)
	e2 = rnorm(N,0,1.5)
	e3 = rnorm(N,0,1.5)
	z1 = rnorm(N,3,1)
	z2 = rnorm(N,3,1)
	z3 = rnorm(N,3,1)
	g = (.2*g1 + .15*g2 - .3*g3 + .1*g4 + .05*g1_g3 + .2*g2_g3)
	e = -.45*e1 + .35*e2 + .2*e3
	z = .15*z1 + .75*z2 + .10*z3
	y_true = -2 + 2*g + 3*e + z + 5*g*e - 1.5*z*e + 2*z*g + 2*z*g*e
	if (logit){
		y_true = 1/(1+exp(-(y_true)))
		y = rbinom(N,1,y_true)
	}
	else{
		eps = rnorm(N,0,sigma)
		y = y_true + eps
	}
	return(list(data=data.frame(y,y_true),latent_var=list(G=data.frame(g1,g2,g3,g4,g1_g3,g2_g3),E=data.frame(e1,e2,e3),Z=data.frame(z1,z2,z3)),coef_G=c(.2,.15,-.3,.1,.05,.2),coef_E=c(-.45,.35,.2),coef_Z=c(.15,.75,.10), coef_main=c(5,2,3,1,5,1.5,2,2)))
}

longitudinal_folds = function(cv_iter=1, cv_folds=10, id, formula=NULL, data=NULL, data_needed=NULL){
	if (cv_folds > length(unique(id))) stop("cv_folds must be smaller than the number of unique id")
	# in IMLEGIT, data_needed would be latent_var which is a list and we need to unlist it if that's the case
	if (!is.null(data_needed)) if(class(data_needed)=="list") data_needed = do.call(cbind.data.frame, data_needed)
	if (!is.null(data) && !is.null(formula)){
		# Extracting only the variables available from the formula
		formula = as.formula(formula)
		formula_full = stats::terms(formula,simplify=TRUE)
		formula_outcome = get.vars(formula)[1]
		formula_elem_ = attributes(formula_full)$term.labels
		vars_names = get.vars(formula)[get.vars(formula) %in% names(data)]
		if (!is.null(data_needed)){
			vars_names = c(vars_names,names(data_needed))
			data = data.frame(data,data_needed)
		}
		vars_names[-length(vars_names)] = paste0(vars_names[-length(vars_names)], " + ")
		formula_n = paste0(formula_outcome, " ~ ", paste0(vars_names,collapse=""))

		data = stats::model.frame(formula_n, data, na.action=na.pass)
		id = id[stats::complete.cases(data)]
	}
	else{
		if (!is.null(data_needed)) id = id[stats::complete.cases(data_needed)]
	}
	folds = vector("list", cv_iter)
	for (i in 1:cv_iter){
		s = sample(sort(unique(id)))
	 	id_new = cut(1:length(s),breaks=cv_folds,labels=FALSE)
	 	folds[[i]] = rep(NA, length(id))
	 	for (j in 1:cv_folds){
	 		folds[[i]][id %in% s[id_new==j]] = j
	 	}
	}
 	return(folds)
}

LEGIT = function(data, genes, env, formula, start_genes=NULL, start_env=NULL, eps=.001, maxiter=100, family=gaussian, print=TRUE)
{
	if (maxiter <= 0) warning("maxiter must be > 0")
	if(!is.null(start_genes)){
		if (NCOL(genes)!=length(start_genes)) stop("start_genes must either be NULL or have the same length as the number of genes")
		}
	if(!is.null(start_env)){
		if (NCOL(env)!=length(start_env)) stop("start_env must either be NULL or have the same length as the number of environments")
	}
	if (class(data) != "data.frame" && class(data) != "matrix") stop("data must be a data.frame")

	# getting right formats
	# Retaining only the needed variables from the dataset (need to set G and E variables for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	data$G=0
	data$E=0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	genes = as.matrix(genes, drop=FALSE)
	if (is.null(colnames(genes))){
		if (print) cat("You have not specified column names for genes, they will be named gene1, gene2, ...\n")
		colnames(genes) = paste0("gene",1:NCOL(genes))
	}
	env = as.matrix(env, drop=FALSE)
	if (is.null(colnames(env))){
		if (print) cat("You have not specified column names for env, they will be named env1, env2, ...\n")
		colnames(env) = paste0("env",1:NCOL(env))
	}
	formula = stats::as.formula(formula)

	# Error message about factors
	if (sum(apply(data,2,is.numeric)) != NCOL(data) || sum(apply(genes,2,is.numeric)) != NCOL(genes) || sum(apply(env,2,is.numeric)) != NCOL(env)) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, gene, env)")

	# remove missing data
	comp = stats::complete.cases(data,genes,env)
	data = data[comp,, drop=FALSE]
	genes = genes[comp,, drop=FALSE]
	env = env[comp,, drop=FALSE]
	if (dim(data)[1] <= 0) stop("no valid observation without missing values")

	#Adding empty variables in main dataset for genes and env
	data[,colnames(genes)]=0
	data[,colnames(env)]=0
	data$R0_b=0
	data$R0_c=0

	# Setting up initial weighted scores
	if (is.null(start_genes)) weights_genes = rep(1/dim(genes)[2],dim(genes)[2])
	else if (sum(abs(start_genes))==0) weights_genes = rep(1/dim(genes)[2],dim(genes)[2])
	else weights_genes = start_genes/sum(abs(start_genes))

	if (is.null(start_env)) weights_env = rep(1/dim(env)[2],dim(env)[2])
	else if (sum(abs(start_env))==0) weights_env = rep(1/dim(env)[2],dim(env)[2])
	else weights_env = start_env/sum(abs(start_env))

	data$G = genes%*%weights_genes
	data$E = env%*%weights_env

	# Deconstructing formula into parts (No E or G / only E / only G / both G and E)
	formula_full = stats::terms(formula,simplify=TRUE)
	formula_outcome = get.vars(formula)[1]
	formula_elem_ = attributes(formula_full)$term.labels
	# Adding white spaces before and after to recognize a "E" as opposed to another string like "Elephant"
	formula_elem = paste("", formula_elem_,"")
	index_with_G = grepl(" G ",formula_elem, fixed=TRUE) | grepl(" G:",formula_elem, fixed=TRUE) | grepl(":G:",formula_elem, fixed=TRUE) | grepl(":G ",formula_elem, fixed=TRUE)
	index_with_E = grepl(" E ",formula_elem, fixed=TRUE) | grepl(" E:",formula_elem, fixed=TRUE) | grepl(":E:",formula_elem, fixed=TRUE) | grepl(":E ",formula_elem, fixed=TRUE)
	index_with_GE = index_with_G & index_with_E
	index_with_G = index_with_G & !index_with_GE
	index_with_E = index_with_E & !index_with_GE
	data_expanded = stats::model.matrix(formula, data=data)
	if (colnames(data_expanded)[1] == "(Intercept)"){
		formula_elem = c("1",formula_elem)
		index_with_G = c(FALSE, index_with_G)
		index_with_E = c(FALSE, index_with_E)
		index_with_GE = c(FALSE, index_with_GE)
	}
	index_without_GE = !(index_with_G | index_with_E | index_with_GE) 

	## Formulas for reparametrization in step b (estimating G)
	formula_elem_withoutG = formula_elem[index_without_GE | index_with_E]
	formula_elem_withoutG[-length(formula_elem_withoutG)] = paste0(formula_elem_withoutG[-length(formula_elem_withoutG)], " + ")
	formula_withoutG = paste0(formula_outcome, " ~ ", paste0(formula_elem_withoutG,collapse=""))
	if (formula_elem[1] != "1") formula_withoutG = paste0(formula_withoutG, " - 1")
	formula_withoutG = stats::as.formula(formula_withoutG)

	formula_elem_withG = formula_elem[index_with_G | index_with_GE]
	# Remove G elements from formula because we want (b1 + b2*E + ...)*G rather than b1*G + b2*E*G + ...
	formula_elem_withG = gsub(" G ","1",formula_elem_withG, fixed=TRUE)
	formula_elem_withG = gsub(" G:","",formula_elem_withG, fixed=TRUE)
	formula_elem_withG = gsub(":G:",":",formula_elem_withG, fixed=TRUE)
	formula_elem_withG = gsub(":G ","",formula_elem_withG, fixed=TRUE)
	formula_elem_withG[-length(formula_elem_withG)] = paste0(formula_elem_withG[-length(formula_elem_withG)], " + ")
	formula_withG = paste0(formula_outcome, " ~ ", paste0(formula_elem_withG,collapse=""))
	if (!(grepl("1",formula_elem_withG, fixed=TRUE) && TRUE)) formula_withG = paste0(formula_withG, " - 1")
	formula_withG = stats::as.formula(formula_withG)

	## Formulas for reparametrization in step c (estimating E)
	formula_elem_withoutE = formula_elem[index_without_GE | index_with_G]
	formula_elem_withoutE[-length(formula_elem_withoutE)] = paste0(formula_elem_withoutE[-length(formula_elem_withoutE)], " + ")
	formula_withoutE = paste0(formula_outcome, " ~ ", paste0(formula_elem_withoutE,collapse=""))
	if (formula_elem[1] != "1") formula_withoutE = paste0(formula_withoutE, " - 1")
	formula_withoutE = stats::as.formula(formula_withoutE)

	formula_elem_withE = formula_elem[index_with_E | index_with_GE]
	# Remove E elements from formula because we want (b1 + b2*G + ...)*E rather than b1*E + b2*G*E + ...
	formula_elem_withE = gsub(" E ","1",formula_elem_withE, fixed=TRUE)
	formula_elem_withE = gsub(" E:","",formula_elem_withE, fixed=TRUE)
	formula_elem_withE = gsub(":E:",":",formula_elem_withE, fixed=TRUE)
	formula_elem_withE = gsub(":E ","",formula_elem_withE, fixed=TRUE)
	formula_elem_withE[-length(formula_elem_withE)] = paste0(formula_elem_withE[-length(formula_elem_withE)], " + ")
	formula_withE = paste0(formula_outcome, " ~ ", paste0(formula_elem_withE,collapse=""))
	if (!(grepl("1",formula_elem_withE, fixed=TRUE) && TRUE)) formula_withE = paste0(formula_withE, " - 1")
	formula_withE = stats::as.formula(formula_withE)

	# Making formula for step b (estimating G)
	genes_names = colnames(genes)
	genes_names[-length(genes)] = paste0(colnames(genes)[-length(genes)], " + ")
	formula_b = paste0(formula_outcome, " ~ ", paste0(genes_names,collapse=""))
	formula_b = paste0(formula_b, " offset(R0_b) - 1")
	formula_b = stats::as.formula(formula_b)

	# Making formula for step c (estimating E)
	env_names = colnames(env)
	env_names[-length(env)] = paste0(colnames(env)[-length(env)], " + ")
	formula_c = paste0(formula_outcome, " ~ ", paste0(env_names,collapse=""))
	formula_c = paste0(formula_c, " offset(R0_c) - 1")
	formula_c = stats::as.formula(formula_c)

	for (i in 1:maxiter){
		## Step a : fit main model
		fit_a = stats::glm(formula, data=data, family=family, y=FALSE, model=FALSE)

		if (NCOL(genes)>1){
			# Reparametrizing variables for step b (estimating G)
			data_expanded_withoutG = stats::model.matrix(formula_withoutG, data=data)
			data$R0_b = data_expanded_withoutG%*%stats::coef(fit_a)[(index_without_GE | index_with_E)]
			data_expanded_withG = stats::model.matrix(formula_withG, data=data)
			R1_b = data_expanded_withG%*%stats::coef(fit_a)[(index_with_G | index_with_GE)]
			R1_b_genes = genes*as.vector(R1_b)
			data[,colnames(genes)]=R1_b_genes

			## Step b : fit model for G
			fit_b = stats::glm(formula_b, data=data, family=family, y=FALSE, model=FALSE)
			weights_genes_ = stats::coef(fit_b)

			# Updating G estimates and checking convergence
			weights_genes_old = weights_genes
			weights_genes = weights_genes_/sum(abs(weights_genes_))
			data$G = genes%*%weights_genes
			if(sqrt(sum((weights_genes_old-weights_genes)^2)) < eps) conv_G = TRUE
			else conv_G = FALSE
		}
		else conv_G = TRUE

		if (NCOL(env)>1){
			# Reparametrizing variables for step c (estimating E)
			data_expanded_withoutE = stats::model.matrix(formula_withoutE, data=data)
			data$R0_c = data_expanded_withoutE%*%stats::coef(fit_a)[(index_without_GE | index_with_G)]
			data_expanded_withE = stats::model.matrix(formula_withE, data=data)
			R1_c = data_expanded_withE%*%stats::coef(fit_a)[(index_with_E | index_with_GE)]
			R1_c_env = env*as.vector(R1_c)
			data[,colnames(env)]=R1_c_env

			## Step c : fit model for E
			fit_c = stats::glm(formula_c, data=data, family=family, y=FALSE, model=FALSE)
			weights_env_ = stats::coef(fit_c)

			# Updating E estimates and checking convergence
			weights_env_old = weights_env
			weights_env = weights_env_/sum(abs(weights_env_))
			data$E = env%*%weights_env
			if(sqrt(sum((weights_env_old-weights_env)^2)) < eps) conv_E = TRUE
			else conv_E = FALSE
		}
		else conv_E = TRUE

		if (conv_G & conv_E) break
	}

	# Rerunning last time and scaling to return as results
	fit_a = stats::glm(formula, data=data, family=family, y=FALSE, model=FALSE)

	# Reparametrizing variables for step b (estimating G)
	data_expanded_withoutG = stats::model.matrix(formula_withoutG, data=data)
	data$R0_b = data_expanded_withoutG%*%stats::coef(fit_a)[(index_without_GE | index_with_E)]
	data_expanded_withG = stats::model.matrix(formula_withG, data=data)
	R1_b = data_expanded_withG%*%stats::coef(fit_a)[(index_with_G | index_with_GE)]
	R1_b_genes = genes*as.vector(R1_b)
	data[,colnames(genes)]=R1_b_genes

	fit_b = stats::glm(formula_b, data=data, family=family, y=FALSE, model=FALSE)
	data[,colnames(genes)] = data[,colnames(genes)]*sum(abs(stats::coef(fit_b)))
	fit_b = stats::glm(formula_b, data=data, family=family, y=FALSE, model=FALSE)

	# Reparametrizing variables for step c (estimating E)
	data_expanded_withoutE = stats::model.matrix(formula_withoutE, data=data)
	data$R0_c = data_expanded_withoutE%*%stats::coef(fit_a)[(index_without_GE | index_with_G)]
	data_expanded_withE = stats::model.matrix(formula_withE, data=data)
	R1_c = data_expanded_withE%*%stats::coef(fit_a)[(index_with_E | index_with_GE)]
	R1_c_env = env*as.vector(R1_c)
	data[,colnames(env)]=R1_c_env

	fit_c = stats::glm(formula_c, data=data, family=family, y=FALSE, model=FALSE)
	data[,colnames(env)] = data[,colnames(env)]*sum(abs(stats::coef(fit_c)))
	fit_c = stats::glm(formula_c, data=data, family=family, y=FALSE, model=FALSE)

	if (!(abs((fit_a$deviance-fit_b$deviance)/fit_a$deviance))<.01 && abs(((fit_a$deviance-fit_c$deviance)/fit_c$deviance))<.01) warning("Deviance differs by more than 1% between model parts. Make sure that everything was set up properly and try increasing the number of iterations (maxiter).")

	#Change some arguments so that we get the right AIC, BIC and dispersion for the model
	true_aic = fit_a$aic + 2*(fit_b$rank - 1) + 2*(fit_c$rank - 1)
	true_rank = fit_a$rank + (fit_b$rank - 1) + (fit_c$rank - 1)
	true_bic = true_aic - 2*true_rank + log(fit_a$df.null+1)*true_rank
	true_df.residual = (fit_a$df.null+1) - true_rank
	true_null.deviance = fit_a$null.deviance

	# print convergences stuff;
	if (conv_G & conv_E){
		if (print) cat(paste0("Converged in ",i, " iterations\n"))
	} 
	else{
		warning(paste0("Did not reach convergence in maxiter iterations. Try increasing maxiter or make eps smaller."))
	}

	result = list(fit_main = fit_a, fit_genes = fit_b, fit_env = fit_c, true_model_parameters=list(AIC = true_aic, BIC = true_bic, rank = true_rank, df.residual = true_df.residual, null.deviance=true_null.deviance))
	class(result) <- "LEGIT"
	return(result)
}

IMLEGIT = function(data, latent_var, formula, start_latent_var=NULL, eps=.001, maxiter=100, family=gaussian, print=TRUE)
{
	# Setting up latent_var and checks
	if (class(latent_var)!="list") stop("latent_var must be a list of datasets")
	k = length(latent_var)
	if (k==0) stop("latent_var cannot be an empty list")
	if (is.null(names(latent_var))){
		if (print) cat("You have not specified names for the latent variables, assigning names to latent_var is highly recommended to prevent confusion. For now, they will be named L1, L2, ...\n")
		names(latent_var) = paste0("L",1:k)
	}
	for (i in 1:k){
		latent_var[[i]] = as.matrix(data.frame(latent_var[[i]],fix.empty.names=FALSE))
		if (sum(colnames(latent_var[[i]])=="") > 0){
			if (print) cat(paste0("You have not specified column names for certain elements in ",names(latent_var)[i], ", elements of this latent variable will be named ",names(latent_var)[i],1,", ",names(latent_var)[i],2," ...\n"))
			colnames(latent_var[[i]]) = paste0(names(latent_var)[i],1:NCOL(latent_var[[i]]))
		}
	}

	# More checks
	if (maxiter <= 0) warning("maxiter must be > 0")
	if (k > 1) for (i in 1:(k-1)) if (NROW(latent_var[[i]]) != NROW(latent_var[[i+1]])) stop("Some datasets in latent_var don't have the same number of observations")
	if(!is.null(start_latent_var)){
		if (class(start_latent_var)!="list") stop("start_latent_var must be a lit of vectors (or NULL)")
		if (k!=length(start_latent_var)) stop("start_latent_var must have the same size as latent_var")
		for (i in 1:k){
			if (!is.null(latent_var[[i]])){
				if (NCOL(latent_var[[i]])!=length(start_latent_var[[i]])) stop("All elements of start_latent_var must either be NULL or have the same length as the number of the elements in its associated latent variable")
			}
		}
	}
	if (class(data) != "data.frame" && class(data) != "matrix") stop("data must be a data.frame")

	# getting right formats
	# Retaining only the needed variables from the dataset (need to set elements in latent_var for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	for (i in 1:k) data[,names(latent_var)[i]] = 0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	formula = stats::as.formula(formula)

	# Error message about factors
	if (sum(apply(data,2,is.numeric)) != NCOL(data)) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, latent_var[[1]], latent_var[[2]], ...)")
	for (i in 1:k) if (sum(apply(latent_var[[i]],2,is.numeric)) != NCOL(latent_var[[i]])) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, latent_var[[1]], latent_var[[2]], ...)")

	# remove missing data
	comp = stats::complete.cases(data,latent_var[[1]])
	if (k > 1) for (i in 2:k) comp = comp & stats::complete.cases(latent_var[[i]])
	data = data[comp,, drop=FALSE]
	for (i in 1:k) latent_var[[i]] = latent_var[[i]][comp,, drop=FALSE]
	if (dim(data)[1] <= 0) stop("no valid observation without missing values")

	#Adding empty variables in main dataset for latent_var
	for (i in 1:k){
		data[,colnames(latent_var[[i]])]=0
		data[,paste0("R0_",i)]=0
	}

	# Setting up initial weighted latent_var
	weights_latent_var_old = vector("list", k)
	weights_latent_var = vector("list", k)
	if (is.null(start_latent_var)){
		for (i in 1:k) weights_latent_var[[i]] = rep(1/dim(latent_var[[i]])[2],dim(latent_var[[i]])[2])
	}
	else{
		for (i in 1:k){
			if (sum(abs(start_latent_var[[i]]))==0) weights_latent_var[[i]] = rep(1/dim(latent_var[[i]])[2],dim(latent_var[[i]])[2])
			else weights_latent_var[[i]] = start_latent_var[[i]]/sum(abs(start_latent_var[[i]]))
		}		
	}
	for (i in 1:k) data[,names(latent_var)[i]] = latent_var[[i]]%*%weights_latent_var[[i]]

	# Lists needed for later
	index_with_latent_var = vector("list", k)
	formula_withoutlatent_var  = vector("list", k)
	formula_withlatent_var  = vector("list", k)
	formula_step  = vector("list", k)
	fit_ = vector("list", k)

	# Deconstructing formula into parts (With latent_var and without latent_var)
	formula_full = stats::terms(formula,simplify=TRUE)
	formula_outcome = get.vars(formula)[1]
	formula_elem_ = attributes(formula_full)$term.labels
	# Adding white spaces before and after to recognize a "E" as opposed to another string like "Elephant"
	formula_elem = paste("", formula_elem_,"")
	for (i in 1:k) index_with_latent_var[[i]] = grepl(paste0(" ",names(latent_var)[i]," "),formula_elem, fixed=TRUE) | grepl(paste0(" ",names(latent_var)[i],":"),formula_elem, fixed=TRUE) | grepl(paste0(":",names(latent_var)[i],":"),formula_elem, fixed=TRUE) | grepl(paste0(":",names(latent_var)[i]," "),formula_elem, fixed=TRUE)
	data_expanded = stats::model.matrix(formula, data=data)
	if (colnames(data_expanded)[1] == "(Intercept)"){
		formula_elem = c("1",formula_elem)
		for (i in 1:k) index_with_latent_var[[i]] = c(FALSE,index_with_latent_var[[i]])
	}

	for (i in 1:k){
		## Formulas for reparametrizations in each steps
		formula_elem_withoutlatent_var = formula_elem[!index_with_latent_var[[i]]]
		formula_elem_withoutlatent_var[-length(formula_elem_withoutlatent_var)] = paste0(formula_elem_withoutlatent_var[-length(formula_elem_withoutlatent_var)], " + ")
		formula_withoutlatent_var[[i]] = paste0(formula_outcome, " ~ ", paste0(formula_elem_withoutlatent_var,collapse=""))
		if (formula_elem[1] != "1") formula_withoutlatent_var[[i]] = paste0(formula_withoutlatent_var[[i]], " - 1")
		formula_withoutlatent_var[[i]] = stats::as.formula(formula_withoutlatent_var[[i]])

		formula_elem_withlatent_var = formula_elem[index_with_latent_var[[i]]]
		# Remove G elements from formula because we want (b1 + b2*E + ...)*G rather than b1*G + b2*E*G + ...
		formula_elem_withlatent_var = gsub(paste0(" ",names(latent_var)[i]," "),"1",formula_elem_withlatent_var, fixed=TRUE)
		formula_elem_withlatent_var = gsub(paste0(" ",names(latent_var)[i],":"),"",formula_elem_withlatent_var, fixed=TRUE)
		formula_elem_withlatent_var = gsub(paste0(":",names(latent_var)[i],":"),":",formula_elem_withlatent_var, fixed=TRUE)
		formula_elem_withlatent_var = gsub(paste0(":",names(latent_var)[i]," "),"",formula_elem_withlatent_var, fixed=TRUE)
		formula_elem_withlatent_var[-length(formula_elem_withlatent_var)] = paste0(formula_elem_withlatent_var[-length(formula_elem_withlatent_var)], " + ")
		formula_withlatent_var[[i]] = paste0(formula_outcome, " ~ ", paste0(formula_elem_withlatent_var,collapse=""))
		if (!(grepl("1",formula_elem_withlatent_var, fixed=TRUE) && TRUE)) formula_withlatent_var[[i]] = paste0(formula_withlatent_var[[i]], " - 1")
		formula_withlatent_var[[i]] = stats::as.formula(formula_withlatent_var[[i]])

		# Making formula for step i
		latent_var_names = colnames(latent_var[[i]])
		latent_var_names[-length(latent_var[[i]])] = paste0(colnames(latent_var[[i]])[-length(latent_var[[i]])], " + ")
		formula_step[[i]] = paste0(formula_outcome, " ~ ", paste0(latent_var_names,collapse=""))
		formula_step[[i]] = paste0(formula_step[[i]], " offset(R0_",i,") - 1")
		formula_step[[i]] = stats::as.formula(formula_step[[i]])
	}

	for (j in 1:maxiter){
		## Step a : fit main model
		fit_a = stats::glm(formula, data=data, family=family, y=FALSE, model=FALSE)
		conv_latent_var = TRUE
		for (i in 1:k){
			if (NCOL(latent_var[[i]])>1){
				# Reparametrizing variables for step i (estimating i-th latent_var)
				data_expanded_withoutlatent_var = stats::model.matrix(formula_withoutlatent_var[[i]], data=data)
				data[,paste0("R0_",i)] = data_expanded_withoutlatent_var%*%stats::coef(fit_a)[!index_with_latent_var[[i]]]
				data_expanded_withlatent_var = stats::model.matrix(formula_withlatent_var[[i]], data=data)
				R1 = data_expanded_withlatent_var%*%stats::coef(fit_a)[index_with_latent_var[[i]]]
				R1_latent_var = latent_var[[i]]*as.vector(R1)
				data[,colnames(latent_var[[i]])]=R1_latent_var

				## Step i-th : fit model for i-th latent_var
				fit_[[i]] = stats::glm(formula_step[[i]], data=data, family=family, y=FALSE, model=FALSE)
				weights_latent_var_ = stats::coef(fit_[[i]])

				# Updating latent_var estimates and checking convergence
				weights_latent_var_old[[i]] = weights_latent_var[[i]]
				weights_latent_var[[i]] = weights_latent_var_/sum(abs(weights_latent_var_))
				data[,names(latent_var)[i]] = latent_var[[i]]%*%weights_latent_var[[i]]
				if(sqrt(sum((weights_latent_var_old[[i]]-weights_latent_var[[i]])^2)) < eps) conv_latent_var = conv_latent_var & TRUE
				else conv_latent_var = FALSE
			}
			else conv_latent_var = conv_latent_var & TRUE
		}
		if (conv_latent_var) break
	}

	# Rerunning last time and scaling to return as results
	fit_a = stats::glm(formula, data=data, family=family, y=FALSE, model=FALSE)

	warn = FALSE
	total_rank = 0
	for (i in 1:k){
		# Reparametrizing variables for step i (estimating i-th latent_var)
		data_expanded_withoutlatent_var = stats::model.matrix(formula_withoutlatent_var[[i]], data=data)
		data[,paste0("R0_",i)] = data_expanded_withoutlatent_var%*%stats::coef(fit_a)[!index_with_latent_var[[i]]]
		data_expanded_withlatent_var = stats::model.matrix(formula_withlatent_var[[i]], data=data)
		R1 = data_expanded_withlatent_var%*%stats::coef(fit_a)[index_with_latent_var[[i]]]
		R1_latent_var = latent_var[[i]]*as.vector(R1)
		data[,colnames(latent_var[[i]])]=R1_latent_var

		fit_[[i]] = stats::glm(formula_step[[i]], data=data, family=family, y=FALSE, model=FALSE)
		data[,colnames(latent_var[[i]])] = data[,colnames(latent_var[[i]])]*sum(abs(stats::coef(fit_[[i]])))
		fit_[[i]] = stats::glm(formula_step[[i]], data=data, family=family, y=FALSE, model=FALSE)
		if (abs(((fit_a$deviance-fit_[[i]]$deviance)/fit_a$deviance))>=.01 && !warn){
			warning("Deviance differs by more than 1% between model parts. Make sure that everything was set up properly and try increasing the number of iterations (maxiter).")
			warn = TRUE
		}
		total_rank = total_rank + fit_[[i]]$rank - 1
	}

	#Change some arguments so that we get the right AIC, BIC and dispersion for the model
	true_aic = fit_a$aic + 2*(total_rank)
	true_rank = fit_a$rank + total_rank
	true_bic = true_aic - 2*true_rank + log(fit_a$df.null+1)*true_rank
	true_df.residual = (fit_a$df.null+1) - true_rank
	true_null.deviance = fit_a$null.deviance

	# print convergences stuff;
	if (conv_latent_var){
		if (print) cat(paste0("Converged in ",j, " iterations\n"))
	} 
	else{
		warning(paste0("Did not reach convergence in maxiter iterations. Try increasing maxiter or make eps smaller."))
	}

	result = list(fit_main = fit_a, fit_latent_var = fit_, true_model_parameters=list(AIC = true_aic, BIC = true_bic, rank = true_rank, df.residual = true_df.residual, null.deviance=true_null.deviance))
	class(result) <- "IMLEGIT"
	return(result)
}

predict.LEGIT = function(object, data, genes, env, ...){
	data = data.frame(data)
	genes = as.matrix(genes, drop=FALSE)
	env = as.matrix(env, drop=FALSE)
	data$G = genes%*%stats::coef(object[[2]])
	data$E = env%*%stats::coef(object[[3]])
	return(stats::predict.glm(object[[1]], newdata=data, ...))
}

predict.IMLEGIT = function(object, data, latent_var, ...){
	data = data.frame(data)
	k = length(latent_var)
	for (i in 1:k) latent_var[[i]] = as.matrix(data.frame(latent_var[[i]],fix.empty.names=FALSE))
	if (is.null(names(latent_var))){
		cat("You have not specified names for the latent variables, assigning names to latent_var is highly recommended to prevent confusion. For now, they will be named L1, L2, ...\n")
		names(latent_var) = paste0("L",1:k)
	}
	for (i in 1:k) data[,names(latent_var)[i]] = latent_var[[i]]%*%stats::coef(object[[2]][[i]])
	return(stats::predict.glm(object[[1]], newdata=data, ...))
}

summary.LEGIT = function(object, ...){
	lapply(object[1:3],function(object_current, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...){
		# Using the right values
		object_current$aic = object$true_model_parameters$AIC
		object_current$rank = object$true_model_parameters$rank
		object_current$df.residual = object$true_model_parameters$df.residual
		object_current$null.deviance = object$true_model_parameters$null.deviance
	    est.disp <- FALSE
	    df.r <- object_current$df.residual
	    if (is.null(dispersion)) 
	        dispersion <- if (object_current$family$family %in% c("poisson", "binomial")) 1
	        else if (df.r > 0) {
	            est.disp <- TRUE
	            if (any(object_current$weights == 0)) 
	                warning("observations with zero weight not used for calculating dispersion")
	            sum((object_current$weights * object_current$residuals^2)[object_current$weights > 
	                0])/df.r
	        }
	        else {
	            est.disp <- TRUE
	            NaN
	        }
	    aliased <- is.na(stats::coef(object_current))
	    p <- object_current$qr$rank
	    if (p > 0) {
	        p1 <- 1L:p
	        coef.p <- object_current$coefficients[object_current$qr$pivot[p1]]
	        covmat.unscaled <- chol2inv(object_current$qr$qr)
	        dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
	        covmat <- dispersion * covmat.unscaled
	        var.cf <- diag(covmat)
	        s.err <- sqrt(var.cf)
	        tvalue <- coef.p/s.err
	        dn <- c("Estimate", "Std. Error")
	        if (!est.disp) {
	            pvalue <- 2 * pnorm(-abs(tvalue))
	            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "z value", "Pr(>|z|)"))
	        }
	        else if (df.r > 0) {
	            pvalue <- 2 * stats::pt(-abs(tvalue), df.r)
	            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "t value", "Pr(>|t|)"))
	        }
	        else {
	            coef.table <- cbind(coef.p, NaN, NaN, NaN)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "t value", "Pr(>|t|)"))
	        }
	        df.f <- NCOL(object_current$qr$qr)
	    }
	    else {
	        coef.table <- matrix(, 0L, 4L)
	        dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error", 
	            "t value", "Pr(>|t|)"))
	        covmat.unscaled <- covmat <- matrix(, 0L, 0L)
	        df.f <- length(aliased)
	    }
	    keep <- match(c("call", "terms", "family", "deviance", "aic", 
	        "contrasts", "df.residual", "null.deviance", "df.null", 
	        "iter", "na.action"), names(object_current), 0L)
	    ans <- c(object_current[keep], list(deviance.resid = stats::residuals(object_current, 
	        type = "deviance"), coefficients = coef.table, aliased = aliased, 
	        dispersion = dispersion, df = c(object_current$rank, df.r, df.f), 
	        cov.unscaled = covmat.unscaled, cov.scaled = covmat))
	    if (correlation && p > 0) {
	        dd <- sqrt(diag(covmat.unscaled))
	        ans$correlation <- covmat.unscaled/outer(dd, dd)
	        ans$symbolic.cor <- symbolic.cor
	    }
	    class(ans) <- "summary.glm"
	    return(ans)
	})
}

summary.IMLEGIT = function(object, ...){
	newobject = list(fit_main=object$fit_main)
	for (i in 1:length(object$fit_latent_var)) newobject[[i+1]] = object$fit_latent_var[[i]]
	lapply(newobject,function(object_current, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...){
		# Using the right values
		object_current$aic = object$true_model_parameters$AIC
		object_current$rank = object$true_model_parameters$rank
		object_current$df.residual = object$true_model_parameters$df.residual
		object_current$null.deviance = object$true_model_parameters$null.deviance
	    est.disp <- FALSE
	    df.r <- object_current$df.residual
	    if (is.null(dispersion)) 
	        dispersion <- if (object_current$family$family %in% c("poisson", "binomial")) 1
	        else if (df.r > 0) {
	            est.disp <- TRUE
	            if (any(object_current$weights == 0)) 
	                warning("observations with zero weight not used for calculating dispersion")
	            sum((object_current$weights * object_current$residuals^2)[object_current$weights > 
	                0])/df.r
	        }
	        else {
	            est.disp <- TRUE
	            NaN
	        }
	    aliased <- is.na(stats::coef(object_current))
	    p <- object_current$qr$rank
	    if (p > 0) {
	        p1 <- 1L:p
	        coef.p <- object_current$coefficients[object_current$qr$pivot[p1]]
	        covmat.unscaled <- chol2inv(object_current$qr$qr)
	        dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
	        covmat <- dispersion * covmat.unscaled
	        var.cf <- diag(covmat)
	        s.err <- sqrt(var.cf)
	        tvalue <- coef.p/s.err
	        dn <- c("Estimate", "Std. Error")
	        if (!est.disp) {
	            pvalue <- 2 * pnorm(-abs(tvalue))
	            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "z value", "Pr(>|z|)"))
	        }
	        else if (df.r > 0) {
	            pvalue <- 2 * stats::pt(-abs(tvalue), df.r)
	            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "t value", "Pr(>|t|)"))
	        }
	        else {
	            coef.table <- cbind(coef.p, NaN, NaN, NaN)
	            dimnames(coef.table) <- list(names(coef.p), c(dn, 
	                "t value", "Pr(>|t|)"))
	        }
	        df.f <- NCOL(object_current$qr$qr)
	    }
	    else {
	        coef.table <- matrix(, 0L, 4L)
	        dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error", 
	            "t value", "Pr(>|t|)"))
	        covmat.unscaled <- covmat <- matrix(, 0L, 0L)
	        df.f <- length(aliased)
	    }
	    keep <- match(c("call", "terms", "family", "deviance", "aic", 
	        "contrasts", "df.residual", "null.deviance", "df.null", 
	        "iter", "na.action"), names(object_current), 0L)
	    ans <- c(object_current[keep], list(deviance.resid = stats::residuals(object_current, 
	        type = "deviance"), coefficients = coef.table, aliased = aliased, 
	        dispersion = dispersion, df = c(object_current$rank, df.r, df.f), 
	        cov.unscaled = covmat.unscaled, cov.scaled = covmat))
	    if (correlation && p > 0) {
	        dd <- sqrt(diag(covmat.unscaled))
	        ans$correlation <- covmat.unscaled/outer(dd, dd)
	        ans$symbolic.cor <- symbolic.cor
	    }
	    class(ans) <- "summary.glm"
	    return(ans)
	})
}

LEGIT_cv = function (data, genes, env, formula, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_genes=NULL, start_env=NULL, eps=.001, maxiter=100, family=gaussian, seed=NULL, id=NULL){

	# Renaming it because there is already an id variable
	if (!is.null(id)) obs_id = id
	else obs_id = NULL
	id = NULL

	# getting right formats
	# Retaining only the needed variables from the dataset (need to set G and E variables for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	data$G=0
	data$E=0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	genes = as.matrix(genes, drop=FALSE)
	if (is.null(colnames(genes))){
		if (print) cat("You have not specified column names for genes, they will be named gene1, gene2, ...\n")
		colnames(genes) = paste0("gene",1:NCOL(genes))
	}
	env = as.matrix(env, drop=FALSE)
	if (is.null(colnames(env))){
		if (print) cat("You have not specified column names for env, they will be named env1, env2, ...\n")
		colnames(env) = paste0("env",1:NCOL(env))
	}
	formula = stats::as.formula(formula)

	# Error message about factors
	if (sum(apply(data,2,is.numeric)) != NCOL(data) || sum(apply(genes,2,is.numeric)) != NCOL(genes) || sum(apply(env,2,is.numeric)) != NCOL(env)) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, gene, env)")

	# remove missing data
	comp = stats::complete.cases(data,genes,env)
	data = data[comp,, drop=FALSE]
	genes = genes[comp,, drop=FALSE]
	env = env[comp,, drop=FALSE]
	if (!is.null(obs_id)){
		if (!is.null(dim(obs_id))) obs_id = obs_id[comp,,drop=FALSE]
		else obs_id = obs_id[comp]
	}
	if (dim(data)[1] <= 0) stop("no valid observation without missing values")

	formula_outcome = get.vars(formula)[1]
	R2_cv = c()
	Huber_cv = c()
	L1_cv = c()
	AUC = c()
	best_threshold = c()
	roc_curve = list()
	residuals = rep(0,dim(data)[1])
	pearson_residuals = rep(0,dim(data)[1])

	if (!is.null(folds)) cv_iter = length(folds)	

	for (j in 1:cv_iter){
		if (!is.null(seed)) set.seed(seed*j)
		# Folds
		if (is.null(folds)){
			s = sample(NROW(data))
			data_n = data[s,, drop=FALSE]
			genes_n = genes[s,, drop=FALSE]
			env_n = env[s,, drop=FALSE]
			id = cut(seq(1,NROW(data_n)),breaks=cv_folds,labels=FALSE)
			list = 1:cv_folds
		}
		else{
			s = 1:NROW(data)
			data_n = data
			genes_n = genes
			env_n = env
			id = folds[[j]]
			list = unique(id)
		}
		pred=c()
		y_test=c()

		for (i in list){
			# Train and test datasets
			data_train = subset(data_n, id != i, drop = FALSE)
			genes_train = subset(genes_n, id != i, drop = FALSE)
	 		env_train = subset(env_n, id != i, drop = FALSE)
	 		data_test = subset(data_n, id == i, drop = FALSE)
	 		genes_test = subset(genes_n, id == i, drop = FALSE)
	 		env_test = subset(env_n, id == i, drop = FALSE)
	 		y_test_new = data_test[,formula_outcome]

	 		# Fit model and add predictions
	 		fit_train = LEGIT(data=data_train, genes=genes_train, env=env_train, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			pred_new = predict(fit_train, data=data_test,genes=genes_test,env=env_test,type="response")
			pred = c(pred,pred_new)
			y_test = c(y_test, y_test_new)
		}

		# Cross-validated R2
		ssres = sum((pred-y_test)^2)
		sstotal = sum((y_test-mean(y_test))^2)
		R2_cv = c(R2_cv, 1 - ssres/sstotal)
		# Outlier-resistant cross-validation criterion
		L1_cv = c(L1_cv, sum(abs(pred-y_test))/length(pred))
		Huber_index = abs(pred-y_test) > Huber_p
		Huber_cv_err = (((pred-y_test)^2)/2)
		Huber_cv_err[Huber_index] = (Huber_p*abs(pred-y_test)-(Huber_p^2)/2)[Huber_index]
		Huber_cv = c(Huber_cv, sum(Huber_cv_err)/length(pred))

		#Cross-validated confusion matrix and ROC curve
		if (classification){
			roc_curve_n = pROC::roc(y_test,pred)
			roc_curve = append(roc_curve, list(roc_curve_n))
			AUC = c(AUC, pROC::auc(roc_curve_n))
			best_threshold =  rbind(pROC::coords(roc_curve_n, "best"),best_threshold)
		}

		#Residuals (To detect outliers)
		residuals = residuals + scale(pred-y_test)[s]
		if(class(family)=="function") pearson_residuals = pearson_residuals + scale((pred-y_test)/sqrt(family()$variance(pred)))[s]
		else pearson_residuals = pearson_residuals + scale((pred-y_test)/sqrt(family$variance(pred)))[s]
	}
	residuals = residuals/cv_iter
	pearson_residuals = pearson_residuals/cv_iter

	possible_outliers = abs(residuals)>2.5 | abs(pearson_residuals)>2.5
	if (is.null(obs_id)) possible_outliers_data = cbind(rownames(data)[possible_outliers],residuals[possible_outliers],pearson_residuals[possible_outliers])
	else{
		if (!is.null(dim(obs_id))) obs_id = obs_id[possible_outliers,,drop=FALSE]
		else obs_id = obs_id[possible_outliers]
		possible_outliers_data = cbind(obs_id,residuals[possible_outliers],pearson_residuals[possible_outliers])
	}
	if (NCOL(possible_outliers_data)==3){
		if (is.null(obs_id)) colnames(possible_outliers_data) = c("Observation","Standardized_residual","Standardized_pearson_residual")
		else colnames(possible_outliers_data) = c("ID","Standardized_residual","Standardized_pearson_residual")
	}
	else{
		if (!is.null(colnames(obs_id))) colnames(possible_outliers_data) = c(colnames(obs_id),"Standardized_residual","Standardized_pearson_residual")
		else colnames(possible_outliers_data) = c(rep("ID",NCOL(obs_id)),"Standardized_residual","Standardized_pearson_residual")
	}

	if (classification) return(list(R2_cv = R2_cv, Huber_cv = Huber_cv, L1_cv=L1_cv, AUC=AUC, best_threshold=best_threshold, roc_curve = roc_curve, possible_outliers = possible_outliers_data))
	return(list(R2_cv = R2_cv, Huber_cv = Huber_cv, L1_cv=L1_cv, possible_outliers = possible_outliers_data))
}

IMLEGIT_cv = function (data, latent_var, formula, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_latent_var=NULL, eps=.001, maxiter=100, family=gaussian, seed=NULL, id=NULL){

	# Renaming it because there is already an id variable
	if (!is.null(id)) obs_id = id
	else obs_id = NULL
	id = NULL

	# Setting up latent_var and checks
	if (class(latent_var)!="list") stop("latent_var must be a list of datasets")
	k = length(latent_var)
	if (k==0) stop("latent_var cannot be an empty list")
	if (is.null(names(latent_var))){
		cat("You have not specified names for the latent variables, assigning names to latent_var is highly recommended to prevent confusion. For now, they will be named L1, L2, ...\n")
		names(latent_var) = paste0("L",1:k)
	}
	for (i in 1:k){
		latent_var[[i]] = as.matrix(data.frame(latent_var[[i]],fix.empty.names=FALSE))
		if (sum(colnames(latent_var[[i]])=="") > 0){
			if (print) cat(paste0("You have not specified column names for certain elements in ",names(latent_var)[i], ", elements of this latent variable will be named ",names(latent_var)[i],1,", ",names(latent_var)[i],2," ...\n"))
			colnames(latent_var[[i]]) = paste0(names(latent_var)[i],1:NCOL(latent_var[[i]]))
		}
	}

	# More checks
	if (maxiter <= 0) warning("maxiter must be > 0")
	if (k > 1) for (i in 1:(k-1)) if (NROW(latent_var[[i]]) != NROW(latent_var[[i+1]])) stop("Some datasets in latent_var don't have the same number of observations")
	if(!is.null(start_latent_var)){
		if (class(start_latent_var)!="list") stop("start_latent_var must be a lit of vectors (or NULL)")
		if (k!=length(start_latent_var)) stop("start_latent_var must have the same size as latent_var")
		for (i in 1:k){
			if (!is.null(latent_var[[i]])){
				if (NCOL(latent_var[[i]])!=length(start_latent_var[[i]])) stop("All elements of start_latent_var must either be NULL or have the same length as the number of the elements in its associated latent variable")
			}
		}
	}
	if (class(data) != "data.frame" && class(data) != "matrix") stop("data must be a data.frame")

	# getting right formats
	# Retaining only the needed variables from the dataset (need to set elements in latent_var for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	for (i in 1:k) data[,names(latent_var)[i]] = 0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	formula = stats::as.formula(formula)

	# Error message about factors
	if (sum(apply(data,2,is.numeric)) != NCOL(data)) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, latent_var[[1]], latent_var[[2]], ...)")
	for (i in 1:k) if (sum(apply(latent_var[[i]],2,is.numeric)) != NCOL(latent_var[[i]])) stop("All variables used must be numeric, factors are not allowed. Please dummy code all categorical variables inside your datasets (data, latent_var[[1]], latent_var[[2]], ...)")

	# remove missing data
	comp = stats::complete.cases(data,latent_var[[1]])
	if (k > 1) for (i in 2:k) comp = comp & stats::complete.cases(latent_var[[i]])
	data = data[comp,, drop=FALSE]
	for (i in 1:k) latent_var[[i]] = latent_var[[i]][comp,, drop=FALSE]
	if (dim(data)[1] <= 0) stop("no valid observation without missing values")

	formula_outcome = get.vars(formula)[1]
	R2_cv = c()
	Huber_cv = c()
	L1_cv = c()
	AUC = c()
	best_threshold = c()
	roc_curve = list()
	residuals = rep(0,dim(data)[1])
	pearson_residuals = rep(0,dim(data)[1])

	if (!is.null(folds)) cv_iter = length(folds)	

	for (j in 1:cv_iter){
		if (!is.null(seed)) set.seed(seed*j)
		# Folds
		if (is.null(folds)){
			s = sample(NROW(data))
			data_n = data[s,, drop=FALSE]
			latent_var_new = latent_var
			for (l in 1:k) latent_var_new[[l]] = latent_var_new[[l]][s,, drop=FALSE]
			id = cut(seq(1,NROW(data_n)),breaks=cv_folds,labels=FALSE)
			list = 1:cv_folds
		}
		else{
			s = 1:NROW(data)
			data_n = data
			latent_var_new = latent_var
			id = folds[[j]]
			list = unique(id)
		}
		pred=c()
		y_test=c()

		for (i in list){
			# Train and test datasets
			data_train = subset(data_n, id != i, drop = FALSE)
			latent_var_train = latent_var_new
			for (l in 1:k) latent_var_train[[l]] = subset(latent_var_new[[l]], id != i, drop = FALSE)
	 		data_test = subset(data_n, id == i, drop = FALSE)
	 		latent_var_test = latent_var_new
	 		for (l in 1:k) latent_var_test[[l]] = subset(latent_var_new[[l]], id == i, drop = FALSE)
	 		y_test_new = data_test[,formula_outcome]

	 		# Fit model and add predictions
	 		fit_train = IMLEGIT(data=data_train, latent_var=latent_var_train, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			pred_new = predict(fit_train, data=data_test,latent_var=latent_var_test,type="response")
			pred = c(pred,pred_new)
			y_test = c(y_test, y_test_new)
		}

		# Cross-validated R2
		ssres = sum((pred-y_test)^2)
		sstotal = sum((y_test-mean(y_test))^2)
		R2_cv = c(R2_cv, 1 - ssres/sstotal)
		# Outlier-resistant cross-validation criterion
		L1_cv = c(L1_cv, sum(abs(pred-y_test))/length(pred))
		Huber_index = abs(pred-y_test) > Huber_p
		Huber_cv_err = (((pred-y_test)^2)/2)
		Huber_cv_err[Huber_index] = (Huber_p*abs(pred-y_test)-(Huber_p^2)/2)[Huber_index]
		Huber_cv = c(Huber_cv, sum(Huber_cv_err)/length(pred))

		#Cross-validated confusion matrix and ROC curve
		if (classification){
			roc_curve_n = pROC::roc(y_test,pred)
			roc_curve = append(roc_curve, list(roc_curve_n))
			AUC = c(AUC, pROC::auc(roc_curve_n))
			best_threshold =  rbind(pROC::coords(roc_curve_n, "best"),best_threshold)
		}

		#Residuals (To detect outliers)
		residuals = residuals + scale(pred-y_test)[s]
		if(class(family)=="function") pearson_residuals = pearson_residuals + scale((pred-y_test)/sqrt(family()$variance(pred)))[s]
		else pearson_residuals = pearson_residuals + scale((pred-y_test)/sqrt(family$variance(pred)))[s]
	}
	residuals = residuals/cv_iter
	pearson_residuals = pearson_residuals/cv_iter

	possible_outliers = abs(residuals)>2.5 | abs(pearson_residuals)>2.5
	if (is.null(obs_id)) possible_outliers_data = cbind(rownames(data)[possible_outliers],residuals[possible_outliers],pearson_residuals[possible_outliers])
	else{
		if (!is.null(dim(obs_id))) obs_id = obs_id[possible_outliers,,drop=FALSE]
		else obs_id = obs_id[possible_outliers]
		possible_outliers_data = cbind(obs_id,residuals[possible_outliers],pearson_residuals[possible_outliers])
	}
	if (NCOL(possible_outliers_data)==3){
		if (is.null(obs_id)) colnames(possible_outliers_data) = c("Observation","Standardized_residual","Standardized_pearson_residual")
		else colnames(possible_outliers_data) = c("ID","Standardized_residual","Standardized_pearson_residual")
	}
	else{
		if (!is.null(colnames(obs_id))) colnames(possible_outliers_data) = c(colnames(obs_id),"Standardized_residual","Standardized_pearson_residual")
		else colnames(possible_outliers_data) = c(rep("ID",NCOL(obs_id)),"Standardized_residual","Standardized_pearson_residual")
	}

	if (classification) return(list(R2_cv = R2_cv, Huber_cv = Huber_cv, L1_cv=L1_cv, AUC=AUC, best_threshold=best_threshold, roc_curve = roc_curve, possible_outliers = possible_outliers_data))
	return(list(R2_cv = R2_cv, Huber_cv = Huber_cv, L1_cv=L1_cv, possible_outliers = possible_outliers_data))
}

forward_step = function(empty_start_dataset, fit, data, formula, interactive_mode=FALSE, genes_current=NULL, env_current=NULL, genes_toadd=NULL, env_toadd=NULL, search="genes", search_criterion="AIC", p_threshold = .20, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_genes=NULL, start_env=NULL, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE){
	# How much genes or env to add
	if (search=="genes") elements_N = NCOL(genes_toadd)
	if (search=="env") elements_N = NCOL(env_toadd)
	if (elements_N == 0){
		if (search=="genes" && print) cat("No gene added\n")
		if (search=="env" && print) cat("No environment added\n")
		return(NULL)
	}
	# Vector which says which extra variables are "good" (worth exploring)
	good = rep(TRUE, elements_N)
	# Vector which says how much the criterion changed from including the variable
	criterion_before = rep(NA, elements_N)
	criterion_after = rep(NA, elements_N)
	criterion_diff = rep(NA, elements_N)
	# In interactive model, we must keep track of every AIC, BIC, p-value, Cross-validated R2 and AUC to show the user at every iteration
	if (interactive_mode){
		if (search=="genes") interactive = data.frame(variable=colnames(genes_toadd), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
		if (search=="env") interactive = data.frame(variable=colnames(env_toadd), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
	}
	# Complete dataset
	if (NCOL(genes_current)==0) genes_current_nomiss = NULL
	else genes_current_nomiss = genes_current
	if (NCOL(env_current)==0) env_current_nomiss = NULL
	else env_current_nomiss = env_current
	comp_without = stats::complete.cases(data,genes_current_nomiss,env_current_nomiss)
	# Non-cross-validated models
	for (j in 1:elements_N){
		if (search=="genes") fit_with = LEGIT(data=data, genes=cbind(genes_current,genes_toadd[,j,drop=FALSE]), env=env_current, formula=formula, start_genes=c(start_genes,0), start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		if (search=="env") fit_with = LEGIT(data=data, genes=genes_current, env=cbind(env_current,env_toadd[,j,drop=FALSE]), formula=formula, start_genes=start_genes, start_env=c(start_env,0), eps=eps, maxiter=maxiter, family=family, print=FALSE)
		if (search=="genes"){
			p_value = stats::coef(summary(fit_with)$fit_genes)[,4]
			good[j] = p_value[length(p_value)] <= p_threshold
		}
		if (search=="env"){
			p_value = stats::coef(summary(fit_with)$fit_env)[,4]
			good[j] = p_value[length(p_value)] <= p_threshold
		}
		if (empty_start_dataset) fit_without = NULL
		else if (fit$fit_main$df.null != fit_with$fit_main$df.null){
			if (search=="genes") comp = stats::complete.cases(data,genes_current,genes_toadd[,j,drop=FALSE],env_current)
			if (search=="env") comp = stats::complete.cases(data,genes_current,env_toadd[,j,drop=FALSE],env_current)
			fit_without = LEGIT(data=data[comp,,drop=FALSE], genes=genes_current[comp,,drop=FALSE], env=env_current[comp,,drop=FALSE], formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		}
		else fit_without = fit

		if (exclude_worse_AIC && !empty_start_dataset){
			if (fit_with$true_model_parameters$AIC <= fit_without$true_model_parameters$AIC) good[j]=good[j] && TRUE
		}
		if (search_criterion=="AIC"){
			if (empty_start_dataset) criterion_before[j] = Inf
			else criterion_before[j] = fit_without$true_model_parameters$AIC
			criterion_after[j] = fit_with$true_model_parameters$AIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		if (search_criterion=="BIC"){
			if (empty_start_dataset) criterion_before[j] = Inf
			else criterion_before[j] = fit_without$true_model_parameters$BIC
			criterion_after[j] = fit_with$true_model_parameters$BIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		} 
		# Keep p-value, AIC and BIC if in interactive model
		if (interactive_mode){
			if (empty_start_dataset) interactive$N_old[j] = NA
			else interactive$N_old[j] = sum(comp_without)
			interactive$N_new[j] = fit_with$true_model_parameters$df.residual + fit_with$true_model_parameters$rank

			interactive$p_value[j] = round(p_value[length(p_value)],6)

			if (empty_start_dataset) interactive$AIC_old[j] = Inf
			else interactive$AIC_old[j] = fit_without$true_model_parameters$AIC
			interactive$AIC_new[j] = fit_with$true_model_parameters$AIC

			if (empty_start_dataset) interactive$BIC_old[j] = Inf
			else interactive$BIC_old[j] = fit_without$true_model_parameters$BIC
			interactive$BIC_new[j] = fit_with$true_model_parameters$BIC
		}
	}
	if (sum(good)==0){
		if (search=="genes" && print) cat("No gene added\n")
		if (search=="env" && print) cat("No environment added\n")
		return(NULL)
	}
	# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
	if (!interactive_mode){
		if (search_criterion=="AIC" || search_criterion=="BIC"){
			if (min(criterion_diff,na.rm=TRUE) > 0){
				if (search=="genes" && print) cat("No gene added\n")
				if (search=="env" && print) cat("No environment added\n")
				return(NULL)
			}
			if (empty_start_dataset) best_var = which.min(criterion_after)
			else best_var = which.min(criterion_diff)
			if (search=="genes"){
				if (print) cat(paste0("Adding gene: ",colnames(genes_toadd)[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
				genes_current = cbind(genes_current, genes_toadd[,best_var, drop=FALSE])
				genes_toadd = genes_toadd[,-best_var, drop=FALSE]
			}
			if (search=="env"){
				if (print) cat(paste0("Adding environment: ",colnames(env_toadd)[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
				env_current = cbind(env_current, env_toadd[,best_var, drop=FALSE])
				env_toadd = env_toadd[,-best_var, drop=FALSE]						
			}
		}
	}
	# Cross-validated models
	if (search_criterion == "cv" || search_criterion == "cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1"){
		# Dropping variables with p < threshold and worse AIC
		if (interactive_mode) interactive = interactive[good,]
		if (search=="genes") genes_toadd = genes_toadd[,good, drop=FALSE]
		if (search=="env") env_toadd = env_toadd[,good, drop=FALSE]
		if (search=="genes") elements_N = NCOL(genes_toadd)
		if (search=="env") elements_N = NCOL(env_toadd)
		# Vector which says how much the criterion changed from including the variable
		criterion_before = rep(NA, elements_N)
		criterion_after = rep(NA, elements_N)
		criterion_diff = rep(NA, elements_N)
		# Set seed
		if (!is.null(seed)) current_seed = seed
		else current_seed = NULL
		if (!empty_start_dataset) fit_cv = LEGIT_cv(data=data, genes=genes_current, env=env_current, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
		for (j in 1:elements_N){
			if (search=="genes") fit_cv_with = LEGIT_cv(data=data, genes=cbind(genes_current,genes_toadd[,j,drop=FALSE]), env=env_current, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=c(start_genes,0), start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
			if (search=="env") fit_cv_with = LEGIT_cv(data=data, genes=genes_current, env=cbind(env_current,env_toadd[,j,drop=FALSE]), formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=c(start_env,0), eps=eps, maxiter=maxiter, family=family, seed=current_seed)
			if (empty_start_dataset) fit_cv_without = NULL
			else{
				if (search=="genes") comp_with = stats::complete.cases(data,genes_current,genes_toadd[,j,drop=FALSE],env_current)
				if (search=="env") comp_with = stats::complete.cases(data,genes_current,env_toadd[,j,drop=FALSE],env_current)
				if (sum(comp_without) != sum(comp_with)) fit_cv_without = LEGIT_cv(data=data[comp_with,,drop=FALSE], genes=genes_current[comp_with,,drop=FALSE], env=env_current[comp_with,,drop=FALSE], formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
				else fit_cv_without = fit_cv
			}
			if (search_criterion=="cv"){
				if (empty_start_dataset) criterion_before[j] = 0
				else criterion_before[j] = mean(fit_cv_without$R2_cv)
				criterion_after[j] = mean(fit_cv_with$R2_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_Huber"){
				if (empty_start_dataset) criterion_before[j] = Inf
				else criterion_before[j] = mean(fit_cv_without$Huber_cv)
				criterion_after[j] = mean(fit_cv_with$Huber_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_L1"){
				if (empty_start_dataset) criterion_before[j] = Inf
				else criterion_before[j] = mean(fit_cv_without$L1_cv)
				criterion_after[j] = mean(fit_cv_with$L1_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_AUC"){
				if (empty_start_dataset) criterion_before[j] = 0
				else criterion_before[j] = mean(fit_cv_without$AUC)
				criterion_after[j] = mean(fit_cv_with$AUC)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			}
			# Keep cross-validation R2 and AUC in interactive model
			if (interactive_mode){
				if (empty_start_dataset) interactive$cv_R2_old[j] = 0
				else interactive$cv_R2_old[j] = mean(fit_cv_without$R2_cv)
				interactive$cv_R2_new[j] = mean(fit_cv_with$R2_cv)

				if (empty_start_dataset) interactive$cv_L1_old[j] = Inf
				else interactive$cv_L1_old[j] = mean(fit_cv_without$L1_cv)
				interactive$cv_L1_new[j] = mean(fit_cv_with$L1_cv)

				if (empty_start_dataset) interactive$cv_Huber_old[j] = Inf
				else interactive$cv_Huber_old[j] = mean(fit_cv_without$Huber_cv)
				interactive$cv_Huber_new[j] = mean(fit_cv_with$Huber_cv)

				if(classification){
					if (empty_start_dataset) interactive$cv_AUC_old[j] = 0
					else interactive$cv_AUC_old[j] = mean(fit_cv_without$AUC)
					interactive$cv_AUC_new[j] = mean(fit_cv_with$AUC)
				}
			}
		}
		# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
		if (!interactive_mode){
			if ((max(criterion_diff,na.rm=TRUE) < 0 && !(search_criterion=="cv_Huber" || search_criterion=="cv_L1")) || (min(criterion_diff,na.rm=TRUE) > 0 && (search_criterion=="cv_Huber" || search_criterion=="cv_L1"))){
				if (search=="genes" && print) cat("No gene added\n")
				if (search=="env" && print) cat("No environment added\n")
				return(NULL)
			}
			if (search_criterion=="cv_Huber" || search_criterion=="cv_L1") best_var = which.min(criterion_diff)
			else best_var = which.max(criterion_diff)
			if (search=="genes"){
				if (print) cat(paste0("Adding gene: ",colnames(genes_toadd)[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
				genes_current = cbind(genes_current, genes_toadd[,best_var, drop=FALSE])
				genes_toadd = genes_toadd[,-best_var, drop=FALSE]
			}
			if (search=="env"){
				if (print) cat(paste0("Adding environment: ",colnames(env_toadd)[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
				env_current = cbind(env_current, env_toadd[,best_var, drop=FALSE])
				env_toadd = env_toadd[,-best_var, drop=FALSE]				
			}
		}
	}
	if (interactive_mode){
		interactive_n = min(5,elements_N)
		if (search_criterion=="AIC" && empty_start_dataset) neworder = order(interactive$AIC_new, decreasing=FALSE)
		if (search_criterion=="AIC" && !empty_start_dataset) neworder = order(interactive$AIC_new - interactive$AIC_old, decreasing=FALSE)
		if (search_criterion=="BIC" && empty_start_dataset) neworder = order(interactive$BIC_new, decreasing=FALSE)
		if (search_criterion=="BIC" && !empty_start_dataset) neworder = order(interactive$BIC_new - interactive$BIC_old, decreasing=FALSE)
		if (search_criterion=="cv" && empty_start_dataset) neworder = order(interactive$cv_R2_new, decreasing=TRUE)
		if (search_criterion=="cv" && !empty_start_dataset) neworder = order(interactive$cv_R2_new - interactive$cv_R2_old, decreasing=TRUE)
		if (search_criterion=="cv_Huber" && empty_start_dataset) neworder = order(interactive$cv_Huber_new, decreasing=FALSE)
		if (search_criterion=="cv_Huber" && !empty_start_dataset) neworder = order(interactive$cv_Huber_new - interactive$cv_Huber_old, decreasing=FALSE)
		if (search_criterion=="cv_L1" && empty_start_dataset) neworder = order(interactive$cv_L1_new, decreasing=FALSE)
		if (search_criterion=="cv_L1" && !empty_start_dataset) neworder = order(interactive$cv_L1_new - interactive$cv_L1_old, decreasing=FALSE)
		if (search_criterion=="cv_AUC" && empty_start_dataset) neworder = order(interactive$cv_AUC_new, decreasing=TRUE)
		if (search_criterion=="cv_AUC" && !empty_start_dataset) neworder = order(interactive$cv_AUC_new - interactive$cv_AUC_old, decreasing=TRUE)
		interactive = interactive[neworder[1:interactive_n],]
		rownames(interactive)=1:interactive_n
		interactive = format(interactive,scientific=FALSE)
		print(interactive)
		if (search=="genes") input_user = readline(prompt="Enter the index of the gene to be added: ")
		if (search=="env") input_user = readline(prompt="Enter the index of the environment to be added: ")
		if (sum(input_user == rownames(interactive))==0){
			if (search=="genes" && print) cat("No gene added\n")
			if (search=="env" && print) cat("No environment added\n")
			return(NULL)
		}
		best_var = neworder[1:interactive_n][input_user == rownames(interactive)]
		if (search=="genes"){
			if (print) cat(paste0("Adding gene: ",colnames(genes_toadd)[best_var],"\n"))
			genes_current = cbind(genes_current, genes_toadd[,best_var, drop=FALSE])
			genes_toadd = genes_toadd[,-best_var, drop=FALSE]
		}
		if (search=="env"){
			if (print) cat(paste0("Adding environment: ",colnames(env_toadd)[best_var],"\n"))
			env_current = cbind(env_current, env_toadd[,best_var, drop=FALSE])
			env_toadd = env_toadd[,-best_var, drop=FALSE]				
		}
	}
	# Updated model and coefficients
	if (search=="genes") start_genes=c(start_genes,0)
	if (search=="env") start_env=c(start_env,0)
	fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
	start_genes = stats::coef(fit$fit_genes)
	start_env = stats::coef(fit$fit_env)
	if (search=="genes") return(list(fit=fit, start_genes=start_genes,start_env=start_env,genes_current=genes_current,genes_toadd=genes_toadd))
	if (search=="env") return(list(fit=fit, start_genes=start_genes,start_env=start_env,env_current=env_current,env_toadd=env_toadd))
}

forward_step_IM = function(empty_start_dataset, fit, data, formula, interactive_mode=FALSE, latent_var_current=NULL, latent_var_toadd=NULL, search=NULL, search_criterion="AIC", p_threshold = .20, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_latent_var=start_latent_var, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE){
	k = length(latent_var_current)
	# How much genes or env to add
	elements_N = NCOL(latent_var_toadd[[search]])
	if (elements_N == 0){
		if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was added\n"))
		return(NULL)
	}
	# Vector which says which extra variables are "good" (worth exploring)
	good = rep(TRUE, elements_N)
	# Vector which says how much the criterion changed from including the variable
	criterion_before = rep(NA, elements_N)
	criterion_after = rep(NA, elements_N)
	criterion_diff = rep(NA, elements_N)
	# In interactive model, we must keep track of every AIC, BIC, p-value, Cross-validated R2 and AUC to show the user at every iteration
	if (interactive_mode) interactive = data.frame(variable=colnames(latent_var_toadd[[search]]), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
	# Complete dataset
	if (NCOL(latent_var_current[[1]])==0) latent_var_current_nomiss = NULL
	else latent_var_current_nomiss = latent_var_current[[1]]
	comp_without = stats::complete.cases(data,latent_var_current_nomiss)
	if (k > 1) for (i in 2:k){
		if (NCOL(latent_var_current[[i]])==0) latent_var_current_nomiss = NULL
		else latent_var_current_nomiss = latent_var_current[[i]]
		comp_without = comp_without & stats::complete.cases(latent_var_current_nomiss)
	}

	# Non-cross-validated models
	for (j in 1:elements_N){
		# Running IMLEGIT with new variable
		latent_var_new = latent_var_current
		if (is.null(latent_var_current[[search]])) latent_var_new[[search]] = latent_var_toadd[[search]][,j,drop=FALSE]
		else latent_var_new[[search]] = cbind(latent_var_current[[search]],latent_var_toadd[[search]][,j,drop=FALSE])
		start_latent_var_new = start_latent_var
		start_latent_var_new[[search]] = c(start_latent_var[[search]],0)
		fit_with = IMLEGIT(data=data, latent_var=latent_var_new, formula=formula, start_latent_var=start_latent_var_new, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		# p-values
		p_value = stats::coef(summary(fit_with)[[search+1]])[,4]
		good[j] = p_value[length(p_value)] <= p_threshold
		if (empty_start_dataset) fit_without = NULL
		else if (fit$fit_main$df.null != fit_with$fit_main$df.null){
			# Removing observations missing the new variable and rerunnning model without the variable (Note : with and without is confusing here)
			comp_with = comp_without & stats::complete.cases(latent_var_toadd[[search]][,j,drop=FALSE])
			data_with = data[comp_with,, drop=FALSE]
			latent_var_with = latent_var_current
			for (i in 1:k) latent_var_with[[i]] = latent_var_current[[i]][comp_with,, drop=FALSE]
			fit_without = IMLEGIT(data=data_with, latent_var=latent_var_with, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		}
		else fit_without = fit

		if (exclude_worse_AIC && !empty_start_dataset){
			if (fit_with$true_model_parameters$AIC <= fit_without$true_model_parameters$AIC) good[j]=good[j] && TRUE
		}
		if (search_criterion=="AIC"){
			if (empty_start_dataset) criterion_before[j] = Inf
			else criterion_before[j] = fit_without$true_model_parameters$AIC
			criterion_after[j] = fit_with$true_model_parameters$AIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		if (search_criterion=="BIC"){
			if (empty_start_dataset) criterion_before[j] = Inf
			else criterion_before[j] = fit_without$true_model_parameters$BIC
			criterion_after[j] = fit_with$true_model_parameters$BIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		} 
		# Keep p-value, AIC and BIC if in interactive model
		if (interactive_mode){
			if (empty_start_dataset) interactive$N_old[j] = NA
			else interactive$N_old[j] = sum(comp_without)
			interactive$N_new[j] = fit_with$true_model_parameters$df.residual + fit_with$true_model_parameters$rank

			interactive$p_value[j] = round(p_value[length(p_value)],6)

			if (empty_start_dataset) interactive$AIC_old[j] = Inf
			else interactive$AIC_old[j] = fit_without$true_model_parameters$AIC
			interactive$AIC_new[j] = fit_with$true_model_parameters$AIC

			if (empty_start_dataset) interactive$BIC_old[j] = Inf
			else interactive$BIC_old[j] = fit_without$true_model_parameters$BIC
			interactive$BIC_new[j] = fit_with$true_model_parameters$BIC
		}
	}
	if (sum(good)==0){
		if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was added\n"))
		return(NULL)
	}
	# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
	if (!interactive_mode){
		if (search_criterion=="AIC" || search_criterion=="BIC"){
			if (min(criterion_diff,na.rm=TRUE) > 0){
				if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was added\n"))
				return(NULL)
			}
			if (empty_start_dataset) best_var = which.min(criterion_after)
			else best_var = which.min(criterion_diff)
			if (print) cat(paste0("Adding element from ", names(latent_var_current)[search], ": ",colnames(latent_var_toadd[[search]])[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
			latent_var_current[[search]] = cbind(latent_var_current[[search]], latent_var_toadd[[search]][,best_var, drop=FALSE])
			latent_var_toadd[[search]] = latent_var_toadd[[search]][,-best_var, drop=FALSE]						
		}
	}
	# Cross-validated models
	if (search_criterion == "cv" || search_criterion == "cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1"){
		# Dropping variables with p < threshold and worse AIC
		if (interactive_mode) interactive = interactive[good,]
		latent_var_toadd = latent_var_toadd[[search]][,good, drop=FALSE]
		elements_N = NCOL(latent_var_toadd[[search]])
		# Vector which says how much the criterion changed from including the variable
		criterion_before = rep(NA, elements_N)
		criterion_after = rep(NA, elements_N)
		criterion_diff = rep(NA, elements_N)
		# Set seed
		if (!is.null(seed)) current_seed = seed
		else current_seed = NULL
		if (!empty_start_dataset) fit_cv = IMLEGIT_cv(data=data, latent_var=latent_var_current, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
		for (j in 1:elements_N){
			# Running IMLEGIT with new variable
			latent_var_new = latent_var_current
			if (is.null(latent_var_current[[search]])) latent_var_new[[search]] = latent_var_toadd[[search]][,j,drop=FALSE]
			latent_var_new[[search]] = cbind(latent_var_current[[search]],latent_var_toadd[[search]][,j,drop=FALSE])
			start_latent_var_new = start_latent_var
			start_latent_var_new[[search]] = c(start_latent_var[[search]],0)
			fit_cv_with = IMLEGIT_cv(data=data, latent_var=latent_var_new, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var_new, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
			if (empty_start_dataset) fit_cv_without = NULL
			else{
				# If new variable has new missing data: Remove observations missing the new variable and rerun model without the variable (Note : with and without is confusing here)
				comp_with = comp_without & stats::complete.cases(latent_var_toadd[[search]][,j,drop=FALSE])
				if (sum(comp_without) != sum(comp_with)){
					data_with = data[comp_with,, drop=FALSE]
					latent_var_with = latent_var_current
					for (i in 1:k) latent_var_with[[i]] = latent_var_current[[i]][comp_with,, drop=FALSE]
					fit_cv_without = IMLEGIT_cv(data=data_with, latent_var=latent_var_with, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
				}
				else fit_cv_without = fit_cv
			}
			if (search_criterion=="cv"){
				if (empty_start_dataset) criterion_before[j] = 0
				else criterion_before[j] = mean(fit_cv_without$R2_cv)
				criterion_after[j] = mean(fit_cv_with$R2_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_Huber"){
				if (empty_start_dataset) criterion_before[j] = Inf
				else criterion_before[j] = mean(fit_cv_without$Huber_cv)
				criterion_after[j] = mean(fit_cv_with$Huber_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_L1"){
				if (empty_start_dataset) criterion_before[j] = Inf
				else criterion_before[j] = mean(fit_cv_without$L1_cv)
				criterion_after[j] = mean(fit_cv_with$L1_cv)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			} 
			if (search_criterion=="cv_AUC"){
				if (empty_start_dataset) criterion_before[j] = 0
				else criterion_before[j] = mean(fit_cv_without$AUC)
				criterion_after[j] = mean(fit_cv_with$AUC)
				criterion_diff[j] = criterion_after[j] - criterion_before[j]
			}
			# Keep cross-validation R2 and AUC in interactive model
			if (interactive_mode){
				if (empty_start_dataset) interactive$cv_R2_old[j] = 0
				else interactive$cv_R2_old[j] = mean(fit_cv_without$R2_cv)
				interactive$cv_R2_new[j] = mean(fit_cv_with$R2_cv)

				if (empty_start_dataset) interactive$cv_L1_old[j] = Inf
				else interactive$cv_L1_old[j] = mean(fit_cv_without$L1_cv)
				interactive$cv_L1_new[j] = mean(fit_cv_with$L1_cv)

				if (empty_start_dataset) interactive$cv_Huber_old[j] = Inf
				else interactive$cv_Huber_old[j] = mean(fit_cv_without$Huber_cv)
				interactive$cv_Huber_new[j] = mean(fit_cv_with$Huber_cv)

				if(classification){
					if (empty_start_dataset) interactive$cv_AUC_old[j] = 0
					else interactive$cv_AUC_old[j] = mean(fit_cv_without$AUC)
					interactive$cv_AUC_new[j] = mean(fit_cv_with$AUC)
				}
			}
		}
		# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
		if (!interactive_mode){
			if ((max(criterion_diff,na.rm=TRUE) < 0 && !(search_criterion=="cv_Huber" || search_criterion=="cv_L1")) || (min(criterion_diff,na.rm=TRUE) > 0 && (search_criterion=="cv_Huber" || search_criterion=="cv_L1"))){
				if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was added\n"))
				return(NULL)
			}
			if (search_criterion=="cv_Huber" || search_criterion=="cv_L1") best_var = which.min(criterion_diff)
			else best_var = which.max(criterion_diff)
			if (print) cat(paste0("Adding element from ", names(latent_var_current)[search], ": ",colnames(latent_var_toadd[[search]])[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
			latent_var_current[[search]] = cbind(latent_var_current[[search]], latent_var_toadd[[search]][,best_var, drop=FALSE])
			latent_var_toadd[[search]] = latent_var_toadd[[search]][,-best_var, drop=FALSE]							
		}
	}
	if (interactive_mode){
		interactive_n = min(5,elements_N)
		if (search_criterion=="AIC" && empty_start_dataset) neworder = order(interactive$AIC_new, decreasing=FALSE)
		if (search_criterion=="AIC" && !empty_start_dataset) neworder = order(interactive$AIC_new - interactive$AIC_old, decreasing=FALSE)
		if (search_criterion=="BIC" && empty_start_dataset) neworder = order(interactive$BIC_new, decreasing=FALSE)
		if (search_criterion=="BIC" && !empty_start_dataset) neworder = order(interactive$BIC_new - interactive$BIC_old, decreasing=FALSE)
		if (search_criterion=="cv" && empty_start_dataset) neworder = order(interactive$cv_R2_new, decreasing=TRUE)
		if (search_criterion=="cv" && !empty_start_dataset) neworder = order(interactive$cv_R2_new - interactive$cv_R2_old, decreasing=TRUE)
		if (search_criterion=="cv_Huber" && empty_start_dataset) neworder = order(interactive$cv_Huber_new, decreasing=FALSE)
		if (search_criterion=="cv_Huber" && !empty_start_dataset) neworder = order(interactive$cv_Huber_new - interactive$cv_Huber_old, decreasing=FALSE)
		if (search_criterion=="cv_L1" && empty_start_dataset) neworder = order(interactive$cv_L1_new, decreasing=FALSE)
		if (search_criterion=="cv_L1" && !empty_start_dataset) neworder = order(interactive$cv_L1_new - interactive$cv_L1_old, decreasing=FALSE)
		if (search_criterion=="cv_AUC" && empty_start_dataset) neworder = order(interactive$cv_AUC_new, decreasing=TRUE)
		if (search_criterion=="cv_AUC" && !empty_start_dataset) neworder = order(interactive$cv_AUC_new - interactive$cv_AUC_old, decreasing=TRUE)
		interactive = interactive[neworder[1:interactive_n],]
		rownames(interactive)=1:interactive_n
		interactive = format(interactive,scientific=FALSE)
		print(interactive)
		input_user = readline(prompt=paste0("Enter the index of the element from ", names(latent_var_current)[search], " to be added: "))
		if (sum(input_user == rownames(interactive))==0){
			if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was added\n"))
			return(NULL)
		}
		best_var = neworder[1:interactive_n][input_user == rownames(interactive)]
		if (print) cat(paste0("Adding element from ", names(latent_var_current)[search], ": ",colnames(latent_var_toadd[[search]])[best_var], " (Criterion before = ",round(criterion_before[best_var],5), "; after = ",round(criterion_after[best_var],5),")\n"))
		latent_var_current[[search]] = cbind(latent_var_current[[search]], latent_var_toadd[[search]][,best_var, drop=FALSE])
		latent_var_toadd[[search]] = latent_var_toadd[[search]][,-best_var, drop=FALSE]						

	}
	# Updated model and coefficients
	start_latent_var[[search]] = c(start_latent_var[[search]],0)
	fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
	for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
	return(list(fit=fit, start_latent_var=start_latent_var, latent_var_current=latent_var_current,latent_var_toadd=latent_var_toadd))
}


backward_step = function(fit, data, formula, interactive_mode=FALSE, genes_current=NULL, env_current=NULL, genes_dropped=NULL, env_dropped=NULL, search="genes", search_criterion="AIC", p_threshold = .20, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_genes=NULL, start_env=NULL, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE){
	# How much genes or env to add
	if (search=="genes") elements_N = NCOL(genes_current)
	if (search=="env") elements_N = NCOL(env_current)
	if (elements_N == 0){
		if (search=="genes" && print) cat("No gene removed\n")
		if (search=="env" && print) cat("No environment removed\n")
		return(NULL)
	}
	# Vector which says which variables are "good" (worth keeping)
	good = rep(FALSE, elements_N)
	# Vector which says how much the criterion changed from excluding the variable
	criterion_before = rep(NA, elements_N)
	criterion_after = rep(NA, elements_N)
	criterion_diff = rep(NA, elements_N)
	# In interactive model, we must keep track of every AIC, BIC, p-value, Cross-validated R2 and AUC to show the user at every iteration
	if (interactive_mode){
		if (search=="genes") interactive = data.frame(variable=colnames(genes_current), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
		if (search=="env") interactive = data.frame(variable=colnames(env_current), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
	}
	# we need to always use this sample as it could be different when removing a variable
	comp_with = stats::complete.cases(data,genes_current,env_current)
	fit_with = fit
	if (search=="genes") p_value = stats::coef(summary(fit_with)$fit_genes)[,4]
	if (search=="env") p_value = stats::coef(summary(fit_with)$fit_env)[,4]
	# Non-cross-validated models
	for (j in 1:elements_N){
		if (search=="genes"){
			comp_without = stats::complete.cases(data,genes_current[,-j,drop=FALSE],env_current)
			fit_without = LEGIT(data=data[comp_with,,drop=FALSE], genes=genes_current[comp_with,-j,drop=FALSE], env=env_current[comp_with,,drop=FALSE], formula=formula, start_genes=start_genes[-j], start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		}
		if (search=="env"){
			comp_without = stats::complete.cases(data,genes_current,env_current[,-j,drop=FALSE])
			fit_without = LEGIT(data=data[comp_with,,drop=FALSE], genes=genes_current[comp_with,,drop=FALSE], env=env_current[comp_with,-j,drop=FALSE], formula=formula, start_genes=start_genes, start_env=start_env[-j], eps=eps, maxiter=maxiter, family=family, print=FALSE)
		}
		good[j] = p_value[j] <= p_threshold

		if (exclude_worse_AIC){
			if (fit_with$true_model_parameters$AIC <= fit_without$true_model_parameters$AIC) good[j]= good[j] || TRUE
		}
		if (search_criterion=="AIC"){
			criterion_before[j] = fit_with$true_model_parameters$AIC
			criterion_after[j] = fit_without$true_model_parameters$AIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		if (search_criterion=="BIC"){
			criterion_before[j] = fit_with$true_model_parameters$BIC
			criterion_after[j] = fit_without$true_model_parameters$BIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		# Keep p-value, AIC and BIC if in interactive model
		if (interactive_mode){
			interactive$N_old[j] = sum(comp_with)
			interactive$N_new[j] = sum(comp_without)

			interactive$p_value[j] = round(p_value[j],6)

			interactive$AIC_old[j] = fit_with$true_model_parameters$AIC
			interactive$AIC_new[j] = fit_without$true_model_parameters$AIC

			interactive$BIC_old[j] = fit_with$true_model_parameters$BIC
			interactive$BIC_new[j] = fit_without$true_model_parameters$BIC
		}
	}
	if (sum(!good)==0){
		if (search=="genes" && print) cat("No gene removed\n")
		if (search=="env" && print) cat("No environment removed\n")
		return(NULL)
	}
	# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
	if (!interactive_mode){
		if (search_criterion=="AIC" || search_criterion=="BIC"){
			if (min(criterion_diff,na.rm=TRUE) > 0){
				if (search=="genes" && print) cat("No gene removed\n")
				if (search=="env" && print) cat("No environment removed\n")
				return(NULL)
			}
			worst_var = which.min(criterion_diff)
			if (search=="genes"){
				if (print) cat(paste0("Removing gene: ",colnames(genes_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
				genes_dropped = cbind(genes_dropped, genes_current[,worst_var, drop=FALSE])
				genes_current = genes_current[,-worst_var, drop=FALSE]
			}
			if (search=="env"){
				if (print) cat(paste0("Removing environment: ",colnames(env_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
				env_dropped = cbind(env_dropped, env_current[,worst_var, drop=FALSE])
				env_current = env_current[,-worst_var, drop=FALSE]
			}
		}
	}
	# Cross-validated models
	if (search_criterion == "cv" || search_criterion == "cv_AUC" || search_criterion =="cv_Huber" || search_criterion =="cv_L1"){
		# Not looking at variables that labelled as good
		elements_N_cv = sum(!good)
		# Vector which says how much the criterion changed from removing the variable
		criterion_before = rep(NA, elements_N)
		criterion_after = rep(NA, elements_N)
		criterion_diff = rep(NA, elements_N)
		# Set seed
		if (!is.null(seed)) current_seed = seed
		else current_seed = NULL
		fit_cv_with = LEGIT_cv(data=data, genes=genes_current, env=env_current, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
		for (j in 1:elements_N){
			# Only do this if not labelled as good
			if (!good[j]){
				if (search=="genes") comp_without = stats::complete.cases(data,genes_current[,-j,drop=FALSE],env_current)
				if (search=="env") comp_without = stats::complete.cases(data,genes_current,env_current[,-j,drop=FALSE])
				if (search=="genes") fit_cv_without = LEGIT_cv(data=data[comp_with,,drop=FALSE], genes=genes_current[comp_with,-j,drop=FALSE], env=env_current[comp_with,,drop=FALSE], formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes[-j], start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
				if (search=="env") fit_cv_without = LEGIT_cv(data=data[comp_with,,drop=FALSE], genes=genes_current[comp_with,,drop=FALSE], env=env_current[comp_with,-j,drop=FALSE], formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env[-j], eps=eps, maxiter=maxiter, family=family, seed=current_seed)
				if (search_criterion=="cv"){
					criterion_before[j] = mean(fit_cv_with$R2_cv)
					criterion_after[j] = mean(fit_cv_without$R2_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				} 
				if (search_criterion=="cv_Huber"){
					criterion_before[j] = mean(fit_cv_with$Huber_cv)
					criterion_after[j] = mean(fit_cv_without$Huber_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				if (search_criterion=="cv_L1"){
					criterion_before[j] = mean(fit_cv_with$L1_cv)
					criterion_after[j] = mean(fit_cv_without$L1_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				if (search_criterion=="cv_AUC"){
					criterion_before[j] = mean(fit_cv_with$AUC)
					criterion_after[j] = mean(fit_cv_without$AUC)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				# Keep cross-validation R2 and AUC in interactive model
				if (interactive_mode){
					interactive$cv_R2_old[j] = mean(fit_cv_with$R2_cv)
					interactive$cv_R2_new[j] = mean(fit_cv_without$R2_cv)

					interactive$cv_Huber_old[j] = mean(fit_cv_with$Huber_cv)
					interactive$cv_Huber_new[j] = mean(fit_cv_without$Huber_cv)

					interactive$cv_L1_old[j] = mean(fit_cv_with$L1_cv)
					interactive$cv_L1_new[j] = mean(fit_cv_without$L1_cv)

					if(classification){
						interactive$cv_AUC_old[j] = mean(fit_cv_with$AUC)
						interactive$cv_AUC_new[j] = mean(fit_cv_without$AUC)
					}
				}
			}
		}
		# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
		if (!interactive_mode){
			if ((max(criterion_diff,na.rm=TRUE) < 0 && !(search_criterion=="cv_Huber" || search_criterion=="cv_L1")) || (min(criterion_diff,na.rm=TRUE) > 0 && (search_criterion=="cv_Huber" || search_criterion=="cv_L1"))){
				if (search=="genes" && print) cat("No gene removed\n")
				if (search=="env" && print) cat("No environment removed\n")
				return(NULL)
			}
			if (search_criterion=="cv_Huber" || search_criterion=="cv_L1") worst_var = which.min(criterion_diff)
			else worst_var = which.max(criterion_diff)
			if (search=="genes"){
				if (print) cat(paste0("Removing gene: ",colnames(genes_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
				genes_dropped = cbind(genes_dropped, genes_current[,worst_var, drop=FALSE])
				genes_current = genes_current[,-worst_var, drop=FALSE]
			}
			if (search=="env"){
				if (print) cat(paste0("Removing environment: ",colnames(env_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
				env_dropped = cbind(env_dropped, env_current[,worst_var, drop=FALSE])
				env_current = env_current[,-worst_var, drop=FALSE]
			}
		}
	}
	if (interactive_mode){
		if (search_criterion=="cv") interactive_n = min(5,elements_N_cv)
		else interactive_n = min(5,elements_N)
		if (search_criterion=="AIC") neworder = order(interactive$AIC_new - interactive$AIC_old, decreasing=FALSE)
		if (search_criterion=="BIC") neworder = order(interactive$BIC_new - interactive$BIC_old, decreasing=FALSE)
		if (search_criterion=="cv") neworder = order(interactive$cv_R2_new - interactive$cv_R2_old, decreasing=TRUE)
		if (search_criterion=="cv_Huber") neworder = order(interactive$cv_Huber_new - interactive$cv_Huber_old, decreasing=FALSE)
		if (search_criterion=="cv_L1") neworder = order(interactive$cv_L1_new - interactive$cv_L1_old, decreasing=FALSE)
		if (search_criterion=="cv_AUC") neworder = order(interactive$cv_AUC_new - interactive$cv_AUC_old, decreasing=TRUE)
		interactive = interactive[neworder[1:interactive_n],]
		rownames(interactive)=1:interactive_n
		interactive = format(interactive,scientific=FALSE)
		print(interactive)
		if (search=="genes") input_user = readline(prompt="Enter the index of the gene to be removed: ")
		if (search=="env") input_user = readline(prompt="Enter the index of the environment to be removed: ")
		if (sum(input_user == rownames(interactive))==0){
			if (search=="genes" && print) cat("No gene removed\n")
			if (search=="env" && print) cat("No environment removed\n")
			return(NULL)
		}
		worst_var = neworder[1:interactive_n][input_user == rownames(interactive)]
		if (search=="genes"){
			if (print) cat(paste0("Removing gene: ",colnames(genes_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
			genes_dropped = cbind(genes_dropped, genes_current[,worst_var, drop=FALSE])
			genes_current = genes_current[,-worst_var, drop=FALSE]
		}
		if (search=="env"){
			if (print) cat(paste0("Removing environment: ",colnames(env_current)[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
			env_dropped = cbind(env_dropped, env_current[,worst_var, drop=FALSE])
			env_current = env_current[,-worst_var, drop=FALSE]
		}
	}
	# Updated model and coefficients
	if (search=="genes") start_genes=start_genes[-worst_var]
	if (search=="env") start_env=start_env[-worst_var]
	fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
	start_genes = stats::coef(fit$fit_genes)
	start_env = stats::coef(fit$fit_env)
	if (search=="genes") return(list(fit=fit, start_genes=start_genes,start_env=start_env,genes_current=genes_current,genes_dropped=genes_dropped))
	if (search=="env") return(list(fit=fit, start_genes=start_genes,start_env=start_env,env_current=env_current,env_dropped=env_dropped))
}

backward_step_IM = function(fit, data, formula, interactive_mode=FALSE, latent_var_current=NULL, latent_var_dropped=NULL, search=NULL, search_criterion="AIC", p_threshold = .20, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_latent_var=start_latent_var, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE){
	k = length(latent_var_current)
	# How much genes or env to add
	elements_N = NCOL(latent_var_current[[search]])
	if (elements_N == 0){
		if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was removed\n"))
		return(NULL)
	}
	# Vector which says which variables are "good" (worth keeping)
	good = rep(FALSE, elements_N)
	# Vector which says how much the criterion changed from excluding the variable
	criterion_before = rep(NA, elements_N)
	criterion_after = rep(NA, elements_N)
	criterion_diff = rep(NA, elements_N)
	# In interactive model, we must keep track of every AIC, BIC, p-value, Cross-validated R2 and AUC to show the user at every iteration
	if (interactive_mode) interactive = data.frame(variable=colnames(latent_var_current[[search]]), N_old=rep(NA, elements_N), N_new=rep(NA, elements_N), p_value=rep(NA, elements_N),AIC_old=rep(NA, elements_N),AIC_new=rep(NA, elements_N),BIC_old=rep(NA, elements_N),BIC_new=rep(NA, elements_N),cv_R2_old=rep(NA, elements_N),cv_R2_new=rep(NA, elements_N),cv_AUC_old=rep(NA, elements_N),cv_AUC_new=rep(NA, elements_N),cv_Huber_old=rep(NA, elements_N),cv_Huber_new=rep(NA, elements_N),cv_L1_old=rep(NA, elements_N),cv_L1_new=rep(NA, elements_N))
	# we need to always use this sample as it could be different when removing a variable
	comp_with = stats::complete.cases(data,latent_var_current[[1]])
	if (k > 1) for (i in 2:k){
		comp_with = comp_with & stats::complete.cases(latent_var_current[[i]])
	}
	fit_with = fit
	p_value = stats::coef(summary(fit_with)[[search+1]])[,4]
	# Non-cross-validated models
	for (j in 1:elements_N){
		# Compelte dataset without variable
		if (search == 1) comp_without = stats::complete.cases(data,latent_var_current[[1]][,-j,drop=FALSE])
		else comp_without = stats::complete.cases(data,latent_var_current[[1]])
		if (k > 1) for (i in 2:k){
			if (search == i) comp_without = comp_without & stats::complete.cases(latent_var_current[[i]][,-j,drop=FALSE])
			else comp_without = comp_without & stats::complete.cases(latent_var_current[[i]])
		}
		latent_var_without = latent_var_current
		for (i in 1:k){
			if (i == search) latent_var_without[[i]] = latent_var_current[[i]][comp_with,-j, drop=FALSE]
			else latent_var_without[[i]] = latent_var_current[[i]][comp_with,, drop=FALSE]
		}
		start_latent_var_without = start_latent_var
		start_latent_var_without[[search]] = start_latent_var[[search]][-j]
		fit_without = IMLEGIT(data=data[comp_with,,drop=FALSE], latent_var=latent_var_without, formula=formula, start_latent_var=start_latent_var_without, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		good[j] = p_value[j] <= p_threshold

		if (exclude_worse_AIC){
			if (fit_with$true_model_parameters$AIC <= fit_without$true_model_parameters$AIC) good[j]= good[j] || TRUE
		}
		if (search_criterion=="AIC"){
			criterion_before[j] = fit_with$true_model_parameters$AIC
			criterion_after[j] = fit_without$true_model_parameters$AIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		if (search_criterion=="BIC"){
			criterion_before[j] = fit_with$true_model_parameters$BIC
			criterion_after[j] = fit_without$true_model_parameters$BIC
			criterion_diff[j] = criterion_after[j] - criterion_before[j]
		}
		# Keep p-value, AIC and BIC if in interactive model
		if (interactive_mode){
			interactive$N_old[j] = sum(comp_with)
			interactive$N_new[j] = sum(comp_without)

			interactive$p_value[j] = round(p_value[j],6)

			interactive$AIC_old[j] = fit_with$true_model_parameters$AIC
			interactive$AIC_new[j] = fit_without$true_model_parameters$AIC

			interactive$BIC_old[j] = fit_with$true_model_parameters$BIC
			interactive$BIC_new[j] = fit_without$true_model_parameters$BIC
		}
	}
	if (sum(!good)==0){
		if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was removed\n"))
		return(NULL)
	}
	# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
	if (!interactive_mode){
		if (search_criterion=="AIC" || search_criterion=="BIC"){
			if (min(criterion_diff,na.rm=TRUE) > 0){
				if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was removed\n"))
				return(NULL)
			}
			worst_var = which.min(criterion_diff)
			if (print) cat(paste0("Removing element from ", names(latent_var_current)[search], ": ",colnames(latent_var_current[[search]])[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
			latent_var_dropped[[search]] = cbind(latent_var_dropped[[search]], latent_var_current[[search]][,worst_var, drop=FALSE])
			latent_var_current[[search]] = latent_var_current[[search]][,-worst_var, drop=FALSE]
		}
	}
	# Cross-validated models
	if (search_criterion == "cv" || search_criterion == "cv_AUC" || search_criterion =="cv_Huber" || search_criterion =="cv_L1"){
		# Not looking at variables that labelled as good
		elements_N_cv = sum(!good)
		# Vector which says how much the criterion changed from removing the variable
		criterion_before = rep(NA, elements_N)
		criterion_after = rep(NA, elements_N)
		criterion_diff = rep(NA, elements_N)
		# Set seed
		if (!is.null(seed)) current_seed = seed
		else current_seed = NULL
		fit_cv_with = IMLEGIT_cv(data=data, latent_var=latent_var_current, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
		for (j in 1:elements_N){
			# Only do this if not labelled as good
			if (!good[j]){
				# Compelte dataset without variable
				if (search == 1) comp_without = stats::complete.cases(data,latent_var_current[[1]][,-j,drop=FALSE])
				else comp_without = stats::complete.cases(data,latent_var_current[[1]])
				if (k > 1) for (i in 2:k){
					if (search == i) comp_without = comp_without & stats::complete.cases(latent_var_current[[i]][,-j,drop=FALSE])
					else comp_without = comp_without & stats::complete.cases(latent_var_current[[i]])
				}
				latent_var_without = latent_var_current
				for (i in 1:k){
					if (i == search) latent_var_without[[i]] = latent_var_current[[i]][comp_with,-j, drop=FALSE]
					else latent_var_without[[i]] = latent_var_current[[i]][comp_with,, drop=FALSE]
				}
				start_latent_var_without = start_latent_var
				start_latent_var_without[[search]] = start_latent_var[[search]][-j]

				fit_cv_without = IMLEGIT_cv(data=data[comp_with,,drop=FALSE], latent_var=latent_var_without, formula=formula, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var_without, eps=eps, maxiter=maxiter, family=family, seed=current_seed)
				if (search_criterion=="cv"){
					criterion_before[j] = mean(fit_cv_with$R2_cv)
					criterion_after[j] = mean(fit_cv_without$R2_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				} 
				if (search_criterion=="cv_Huber"){
					criterion_before[j] = mean(fit_cv_with$Huber_cv)
					criterion_after[j] = mean(fit_cv_without$Huber_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				if (search_criterion=="cv_L1"){
					criterion_before[j] = mean(fit_cv_with$L1_cv)
					criterion_after[j] = mean(fit_cv_without$L1_cv)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				if (search_criterion=="cv_AUC"){
					criterion_before[j] = mean(fit_cv_with$AUC)
					criterion_after[j] = mean(fit_cv_without$AUC)
					criterion_diff[j] = criterion_after[j] - criterion_before[j]
				}
				# Keep cross-validation R2 and AUC in interactive model
				if (interactive_mode){
					interactive$cv_R2_old[j] = mean(fit_cv_with$R2_cv)
					interactive$cv_R2_new[j] = mean(fit_cv_without$R2_cv)

					interactive$cv_Huber_old[j] = mean(fit_cv_with$Huber_cv)
					interactive$cv_Huber_new[j] = mean(fit_cv_without$Huber_cv)

					interactive$cv_L1_old[j] = mean(fit_cv_with$L1_cv)
					interactive$cv_L1_new[j] = mean(fit_cv_without$L1_cv)

					if(classification){
						interactive$cv_AUC_old[j] = mean(fit_cv_with$AUC)
						interactive$cv_AUC_new[j] = mean(fit_cv_without$AUC)
					}
				}
			}
		}
		# Only do this if NOT in interactive mode, otherwise at the end of the algorithm, we show the the data.frame interactive and let the user choose
		if (!interactive_mode){
			if ((max(criterion_diff,na.rm=TRUE) < 0 && !(search_criterion=="cv_Huber" || search_criterion=="cv_L1")) || (min(criterion_diff,na.rm=TRUE) > 0 && (search_criterion=="cv_Huber" || search_criterion=="cv_L1"))){
				if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was removed\n"))
				return(NULL)
			}
			if (search_criterion=="cv_Huber" || search_criterion=="cv_L1") worst_var = which.min(criterion_diff)
			else worst_var = which.max(criterion_diff)
			if (print) cat(paste0("Removing element from ", names(latent_var_current)[search], ": ",colnames(latent_var_current[[search]])[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
			latent_var_dropped[[search]] = cbind(latent_var_dropped[[search]], latent_var_current[[search]][,worst_var, drop=FALSE])
			latent_var_current[[search]] = latent_var_current[[search]][,-worst_var, drop=FALSE]
		}
	}
	if (interactive_mode){
		if (search_criterion=="cv") interactive_n = min(5,elements_N_cv)
		else interactive_n = min(5,elements_N)
		if (search_criterion=="AIC") neworder = order(interactive$AIC_new - interactive$AIC_old, decreasing=FALSE)
		if (search_criterion=="BIC") neworder = order(interactive$BIC_new - interactive$BIC_old, decreasing=FALSE)
		if (search_criterion=="cv") neworder = order(interactive$cv_R2_new - interactive$cv_R2_old, decreasing=TRUE)
		if (search_criterion=="cv_Huber") neworder = order(interactive$cv_Huber_new - interactive$cv_Huber_old, decreasing=FALSE)
		if (search_criterion=="cv_L1") neworder = order(interactive$cv_L1_new - interactive$cv_L1_old, decreasing=FALSE)
		if (search_criterion=="cv_AUC") neworder = order(interactive$cv_AUC_new - interactive$cv_AUC_old, decreasing=TRUE)
		interactive = interactive[neworder[1:interactive_n],]
		rownames(interactive)=1:interactive_n
		interactive = format(interactive,scientific=FALSE)
		print(interactive)
		input_user = readline(prompt=paste0("Enter the index of the element from ", names(latent_var_current)[search], " to be removed: "))
		if (sum(input_user == rownames(interactive))==0){
			if (print) cat(paste0("No element from ", names(latent_var_current)[search]," was removed\n"))
			return(NULL)
		}
		worst_var = neworder[1:interactive_n][input_user == rownames(interactive)]
		if (print) cat(paste0("Removing element from ", names(latent_var_current)[search], ": ",colnames(latent_var_current[[search]])[worst_var], " (Criterion before = ",round(criterion_before[worst_var],5), "; after = ",round(criterion_after[worst_var],5),")\n"))
		latent_var_dropped[[search]] = cbind(latent_var_dropped[[search]], latent_var_current[[search]][,worst_var, drop=FALSE])
		latent_var_current[[search]] = latent_var_current[[search]][,-worst_var, drop=FALSE]

	}
	# Updated model and coefficients
	start_latent_var[[search]] = start_latent_var[[search]][-worst_var]
	fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
	for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
	return(list(fit=fit, start_latent_var=start_latent_var,latent_var_current=latent_var_current,latent_var_dropped=latent_var_dropped))
}


stepwise_search = function(data, formula, interactive_mode=FALSE, genes_original=NULL, env_original=NULL, genes_extra=NULL, env_extra=NULL, search_type="bidirectional-forward", search="both", search_criterion="AIC", forward_exclude_p_bigger = .20, backward_exclude_p_smaller = .01, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_genes=NULL, start_env=NULL, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE, remove_miss=FALSE){
	if (forward_exclude_p_bigger > 1 || forward_exclude_p_bigger <= 0) stop("forward_exclude_p_bigger must be between 0 and 1 (Set to 1 to ignore p-values in forward step)")
	if (backward_exclude_p_smaller >= 1 || backward_exclude_p_smaller < 0) stop("backward_exclude_p_smaller must be between 0 and 1 (Set to 0 to ignore p-values in backward step)")
	if (search_criterion=="AIC") string_choice="lowest AIC"
	else if (search_criterion=="BIC") string_choice="lowest BIC"
	else if (search_criterion=="cv") string_choice="lowest cross-validation error"
	else if (search_criterion=="cv_Huber") string_choice="lowest cross-validation Huber error"
	else if (search_criterion=="cv_L1") string_choice="lowest cross-validation L1-norm error"
	else if (search_criterion=="cv_AUC") string_choice="biggest cross-validated area under the curve"
	else stop("Not a valid search_criterion, use: AIC, BIC, cv or cv_AUC")

	# Retaining only the needed variables from the dataset (need to set G and E variables for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	data$G=0
	data$E=0
	data = stats::model.frame(formula, data=data, na.action=na.pass)

	comp = stats::complete.cases(data, genes_original, env_original, genes_extra, env_extra)
	if (remove_miss){
		data = data[comp,, drop=FALSE]
		if (!is.null(genes_original)) genes_original = genes_original[comp,, drop=FALSE]
		if (!is.null(genes_extra)) genes_extra = genes_extra[comp,, drop=FALSE]
		if (!is.null(env_original)) env_original = env_original[comp,, drop=FALSE]
		if (!is.null(env_extra)) env_extra = env_extra[comp,, drop=FALSE]
		if (dim(data)[1] <= 0) stop("no valid observation without missing values")
	}
	else if (sum(comp)!=length(comp) && print) cat("Note: Missing data was found. This will increase computing time in forward steps \n and could lead to an incorrect subset of variable if the sample size change too much. \n")

	if (interactive_mode && print) cat("<<~ Interative mode enabled ~>>\n")

	# If true, then we start with no genes or envand must find the best one first
	if ((is.null(genes_original) && search=="genes") || (is.null(env_original) && search=="env")) empty_start_dataset = TRUE
	else empty_start_dataset = FALSE
	if ((is.null(genes_original) || is.null(env_original)) && search=="both") stop("To do a stepwise search on both G and E, you must start with at least a single variable in G and in E. Please set genes_original and env_original.")

	# Setting up initial weighted scores
	if (is.null(genes_original) && search=="genes") start_genes = c()
	else if (is.null(start_genes)) start_genes = rep(1/dim(genes_original)[2],dim(genes_original)[2])

	if (is.null(env_original) && search=="env") start_env = c()
	else if (is.null(start_env)) start_env = rep(1/dim(env_original)[2],dim(env_original)[2])

	if (is.null(start_genes) && search=="both") start_genes = rep(1/dim(genes_original)[2],dim(genes_original)[2])
	if (is.null(start_env) && search=="both") start_env = rep(1/dim(env_original)[2],dim(env_original)[2])

	fit = NULL

	if (search_type == "forward"){
		if (print){
			if (forward_exclude_p_bigger < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger, " and which inclusion decrease the AIC\n"))
			else if (forward_exclude_p_bigger < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Keeping only variables which inclusion decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || forward_exclude_p_bigger > .2)) cat("Note : We recommend using exclude_worse_AIC=TRUE and forward_exclude_p_bigger <= .20 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search=="genes") cat(paste0("Forward search of the genes to find the model with the ", string_choice,"\n"))
			if (search=="env") cat(paste0("Forward search of the environments to find the model with the ", string_choice,"\n"))
			if (search=="both") cat(paste0("Forward search of the genes and environments to find the model with the ", string_choice,"\n"))
		}
		genes_current = genes_original
		env_current = env_original
		if (!empty_start_dataset){
			# Original model
			fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			start_genes = stats::coef(fit$fit_genes)
			start_env = stats::coef(fit$fit_env)
		}
		else{
			# If dataset is NULL, then make the current dataset be an empty data frame of same size as the dataset with extra variables 
			if (is.null(genes_original)) genes_current = genes_extra[,-c(1:NCOL(genes_extra))]
			if (is.null(env_original)) env_current = env_extra[,-c(1:NCOL(env_extra))]
		}
		genes_toadd = NULL
		env_toadd = NULL
		if (search=="genes" || search=="both") genes_toadd = genes_extra
		if (search=="env" || search=="both") env_toadd = env_extra
		if (print) cat("\n")

		if (search=="both"){
			G_conv = FALSE
			E_conv = FALSE
			# Starts looking at genes when search="both"
			search_current = "genes"
		}
		else search_current = search
		# Overall convergence (equal to G_conv && E_conv if search=both)
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			results = forward_step(empty_start_dataset=empty_start_dataset, fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, genes_current=genes_current, env_current=env_current, genes_toadd=genes_toadd, env_toadd=env_toadd, search=search_current, search_criterion=search_criterion, p_threshold = forward_exclude_p_bigger, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
			if (is.null(results)){
				if (search=="both"){
					if (search_current == "genes"){
						search_current = "env"
						G_conv = TRUE
						if (E_conv) conv = TRUE
					}
					else{
						search_current = "genes"
						E_conv = TRUE
						if (G_conv) conv = TRUE
					}
				}
				else conv = TRUE
				if (conv) break
			}
			else{
				if (search_current == "genes") E_conv = FALSE
				if (search_current == "env") G_conv = FALSE
				# Resetting parameters based on iteration results
				empty_start_dataset = FALSE
				fit = results$fit
				start_genes=results$start_genes
				start_env=results$start_env
				if (search_current=="genes"){
					genes_current=results$genes_current
					genes_toadd=results$genes_toadd
				}
				if (search_current=="env"){
					env_current=results$env_current
					env_toadd=results$env_toadd
				}
			}
		}
	}
	if (search_type == "backward"){
		if (print){
			if (backward_exclude_p_smaller < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller, " and which removal decrease the AIC\n"))
			else if (backward_exclude_p_smaller < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Dropping only variables which removal decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || backward_exclude_p_smaller < .01)) cat("Note : We recommend using exclude_worse_AIC=TRUE and backward_exclude_p_smaller >= .01 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search=="genes") cat(paste0("Backward search of the genes to find the model with the ", string_choice,"\n"))
			if (search=="env") cat(paste0("Backward search of the environments to find the model with the ", string_choice,"\n"))
			if (search=="both") cat(paste0("Backward search of the genes and environments to find the model with the ", string_choice,"\n"))
		}
		genes_current = genes_original
		env_current = env_original
		# Original model
		fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		start_genes = stats::coef(fit$fit_genes)
		start_env = stats::coef(fit$fit_env)
		# Creating empty data frames of same size as the other data frames
		genes_dropped = genes_original[,-c(1:NCOL(genes_original))]
		env_dropped = env_original[,-c(1:NCOL(env_original))]
		if (print) cat("\n")

		if (search=="both"){
			G_conv = FALSE
			E_conv = FALSE
			# Starts looking at genes when search="both"
			search_current = "genes"
		}
		else search_current = search
		# Overall convergence (equal to G_conv && E_conv if search=both)
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			results = backward_step(fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, genes_current=genes_current, genes_dropped=genes_dropped, env_current=env_current, env_dropped=env_dropped, search=search_current, search_criterion=search_criterion, p_threshold = backward_exclude_p_smaller, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
			if (is.null(results)){
				if (search=="both"){
					if (search_current == "genes"){
						search_current = "env"
						G_conv = TRUE
						if (E_conv) conv = TRUE
					}
					else{
						search_current = "genes"
						E_conv = TRUE
						if (G_conv) conv = TRUE
					}
				}
				else conv = TRUE
				if (conv) break
			}
			else{
				if (search_current == "genes") E_conv = FALSE
				if (search_current == "env") G_conv = FALSE
				# Resetting parameters based on iteration results
				fit = results$fit
				start_genes=results$start_genes
				start_env=results$start_env
				if (search_current=="genes"){
					genes_current=results$genes_current
					# Only used in bidirectionnal
					genes_dropped=results$genes_dropped
					if (NCOL(genes_current)==1){
						if(search=="both"){
							search_current = "env"
							G_conv = TRUE
							if (E_conv) conv = TRUE
						}
						else conv = TRUE
						if (conv) break
					}
				}
				if (search_current=="env"){
					env_current=results$env_current
					# Only used in bidirectionnal
					env_dropped=results$env_dropped
					if (NCOL(env_current)==1){
						if(search=="both"){
							search_current = "genes"
							E_conv = TRUE
							if (G_conv) conv = TRUE
						}
						else conv = TRUE
						if (conv) break
					}
				}
			}
		}
	}
	if (search_type == "bidirectional-forward" || search_type == "bidirectional-backward"){
		if (print){
			if (forward_exclude_p_bigger < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger, " and which inclusion decrease the AIC\n"))
			else if (forward_exclude_p_bigger < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Keeping only variables which inclusion decrease the AIC\n"))
			if (backward_exclude_p_smaller < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller, " and which removal decrease the AIC\n"))
			else if (backward_exclude_p_smaller < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Dropping only variables which removal decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || forward_exclude_p_bigger > .2)) cat("Note : We recommend using exclude_worse_AIC=TRUE and forward_exclude_p_bigger <= .20 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search=="genes") cat(paste0("Bidirectional search of the genes to find the model with the ", string_choice,"\n"))
			if (search=="env") cat(paste0("Bidirectional search of the environments to find the model with the ", string_choice,"\n"))
			if (search=="both") cat(paste0("Bidirectional search of the genes and environments to find the model with the ", string_choice,"\n"))
		}
		if (search_type == "bidirectional-forward"){
			genes_current = genes_original
			env_current = env_original
			if (!empty_start_dataset){
				# Original model
				fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
				start_genes = stats::coef(fit$fit_genes)
				start_env = stats::coef(fit$fit_env)
			}
			else{
				# If dataset is NULL, then make the current dataset be an empty data frame of same size as the dataset with extra variables 
				if (is.null(genes_original)) genes_current = genes_extra[,-c(1:NCOL(genes_extra))]
				if (is.null(env_original)) env_current = env_extra[,-c(1:NCOL(env_extra))]
			}
			genes_toadd_ordrop = NULL
			env_toadd_ordrop = NULL
			if (search=="genes" || search=="both") genes_toadd_ordrop = genes_extra
			if (search=="env" || search=="both") env_toadd_ordrop = env_extra
			direction="forward"
			forward_failed=FALSE
			# Count as failed because we can't backward if forward doesn't work
			backward_failed=TRUE
		}
		if (search_type == "bidirectional-backward"){
			genes_current = genes_original
			env_current = env_original
			# Original model
			fit = LEGIT(data=data, genes=genes_current, env=env_current, formula=formula, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			start_genes = stats::coef(fit$fit_genes)
			start_env = stats::coef(fit$fit_env)
			# Creating empty data frames of same size as the other data frames
			genes_toadd_ordrop = genes_original[,-c(1:NCOL(genes_original))]
			env_toadd_ordrop = env_original[,-c(1:NCOL(env_original))]
			direction="backward"
			# Count as failed because we can't backward if forward doesn't work
			forward_failed=TRUE
			backward_failed=FALSE
		}
		if (print) cat("\n")

		if (search=="both"){
			G_conv = FALSE
			E_conv = FALSE
			# Starts looking at genes when search="both"
			search_current = "genes"
		}
		else search_current = search
		# Overall convergence (equal to G_conv && E_conv if search=both)
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			if (direction=="forward"){
				results = forward_step(empty_start_dataset=empty_start_dataset, fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, genes_current=genes_current, env_current=env_current, genes_toadd=genes_toadd_ordrop, env_toadd=env_toadd_ordrop, search=search_current, search_criterion=search_criterion, p_threshold = forward_exclude_p_bigger, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
				if (is.null(results)) forward_failed=TRUE
				else{
					if (search_current == "genes") E_conv = FALSE
					if (search_current == "env") G_conv = FALSE
					forward_failed=FALSE
					# Resetting parameters based on iteration results
					empty_start_dataset = FALSE
					fit = results$fit
					start_genes=results$start_genes
					start_env=results$start_env
					if (search_current=="genes"){
						genes_current=results$genes_current
						genes_toadd_ordrop=results$genes_toadd
						if (NCOL(genes_toadd_ordrop)==0){
							# Count as a fail
							forward_failed=TRUE
						}
					}
					if (search_current=="env"){
						env_current=results$env_current
						env_toadd_ordrop=results$env_toadd
						if (NCOL(env_toadd_ordrop)==0){
							# Count as a fail
							forward_failed=TRUE
						}
					}
				}
				direction="backward"
			}
			if (forward_failed && backward_failed){
				if (search=="both"){
					backward_failed = FALSE
					forward_failed = FALSE
					if (search_current == "genes"){
						search_current = "env"
						# Count as a fail
						if (NCOL(env_current)==1) backward_failed=TRUE
						if (NCOL(env_toadd_ordrop)==0) forward_failed=TRUE
						G_conv = TRUE
						if (E_conv) conv = TRUE
					}
					else{
						search_current = "genes"
						# Count as a fail
						if (NCOL(genes_current)==1) backward_failed=TRUE
						if (NCOL(genes_toadd_ordrop)==0) forward_failed=TRUE
						E_conv = TRUE
						if (G_conv) conv = TRUE
					}
					if (search_type == "bidirectional-forward") direction="forward"
					if (search_type == "bidirectional-backward") direction="backward"
				}
				else conv = TRUE
				if (conv) break
			}
			if (direction=="backward"){
				# Can't backward if only one remaining variable
				if (!((NCOL(genes_current)<=1 && search_current=="genes") || (NCOL(env_current)<=1 && search_current=="env"))){
					results = backward_step(fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, genes_current=genes_current, genes_dropped=genes_toadd_ordrop, env_current=env_current, env_dropped=env_toadd_ordrop, search=search_current, search_criterion=search_criterion, p_threshold = backward_exclude_p_smaller, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_genes=start_genes, start_env=start_env, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
					if (is.null(results)) backward_failed=TRUE
					else{
						if (search_current == "genes") E_conv = FALSE
						if (search_current == "env") G_conv = FALSE
						backward_failed=FALSE
						# Resetting parameters based on iteration results
						fit = results$fit
						start_genes=results$start_genes
						start_env=results$start_env
						one_remain=FALSE
						if (search_current=="genes"){
							genes_current=results$genes_current
							if (NCOL(genes_current)==1){
								# Count as a fail
								backward_failed=TRUE
							}
							genes_toadd_ordrop=results$genes_dropped
						}
						if (search_current=="env"){
							env_current=results$env_current
							if (NCOL(env_current)==1){
								# Count as a fail
								backward_failed=TRUE
							}
							env_toadd_ordrop=results$env_dropped
						}
					}
				}
				else{
					backward_failed=TRUE
					if (search_current=="genes" && print) cat("No gene removed\n")
					if (search_current=="env" && print) cat("No environment removed\n")
				}
				direction="forward"
			}
			if (forward_failed && backward_failed){
				if (search=="both"){
					backward_failed = FALSE
					forward_failed = FALSE
					if (search_current == "genes"){
						search_current = "env"
						# Count as a fail
						if (NCOL(env_current)==1) backward_failed=TRUE
						if (NCOL(env_toadd_ordrop)==0) forward_failed=TRUE
						G_conv = TRUE
						if (E_conv) conv = TRUE
					}
					else{
						search_current = "genes"
						# Count as a fail
						if (NCOL(genes_current)==1) backward_failed=TRUE
						if (NCOL(genes_toadd_ordrop)==0) forward_failed=TRUE
						E_conv = TRUE
						if (G_conv) conv = TRUE
					}
					if (search_type == "bidirectional-forward") direction="forward"
					if (search_type == "bidirectional-backward") direction="backward"
				}
				else conv = TRUE
				if (conv) break
			}
		}
	}
	if (i >= max_steps) warning("Stepwise search did not reach convergence in max_steps steps. Try increasing max_steps.")
	return(fit)
}


stepwise_search_IM = function(data, formula, interactive_mode=FALSE, latent_var_original=NULL, latent_var_extra=NULL, search_type="bidirectional-forward", search=0, search_criterion="AIC", forward_exclude_p_bigger = .20, backward_exclude_p_smaller = .01, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_latent_var=NULL, eps=.01, maxiter=25, family=gaussian, seed=NULL, print=TRUE, remove_miss=FALSE){
	k = max(length(latent_var_original),length(latent_var_extra))
	if (forward_exclude_p_bigger > 1 || forward_exclude_p_bigger <= 0) stop("forward_exclude_p_bigger must be between 0 and 1 (Set to 1 to ignore p-values in forward step)")
	if (backward_exclude_p_smaller >= 1 || backward_exclude_p_smaller < 0) stop("backward_exclude_p_smaller must be between 0 and 1 (Set to 0 to ignore p-values in backward step)")
	if (search_criterion=="AIC") string_choice="lowest AIC"
	else if (search_criterion=="BIC") string_choice="lowest BIC"
	else if (search_criterion=="cv") string_choice="lowest cross-validation error"
	else if (search_criterion=="cv_Huber") string_choice="lowest cross-validation Huber error"
	else if (search_criterion=="cv_L1") string_choice="lowest cross-validation L1-norm error"
	else if (search_criterion=="cv_AUC") string_choice="biggest cross-validated area under the curve"
	else stop("Not a valid search_criterion, use: AIC, BIC, cv or cv_AUC")

	# Retaining only the needed variables from the dataset (need to set G and E variables for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	for (i in 1:k) data[,names(latent_var_original)[i]] = 0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	#  Check for empty latent variable (Note: Probably shouldn't be named empty_start_dataset but empty_start_latent_var althought that would be even more confusing considering start_latent_var)
	if (is.null(latent_var_original)) stop("latent_var_original cannot be null. However, if search=i, then you could set latent_var_original[[i]]=NULL.")
	# Make it to have NULL elements but not be NULL
	comp = rep(TRUE, NROW(data))
	for (i in 1:k){
		if (!is.null(latent_var_original[[i]])) comp = comp & stats::complete.cases(data, latent_var_original[[i]])
		if (!is.null(latent_var_extra) && !is.null(latent_var_extra[[i]])) comp = comp & stats::complete.cases(data, latent_var_extra[[i]])
	}
	if (remove_miss){
		data = data[comp,, drop=FALSE]
		for (i in 1:k){
			if (!is.null(latent_var_original[[i]])) latent_var_original[[i]] = latent_var_original[[i]][comp,, drop=FALSE]
			if (!is.null(latent_var_extra)){
				if (!is.null(latent_var_extra[[i]])) latent_var_extra[[i]] = latent_var_extra[[i]][comp,, drop=FALSE]
			}
		}
		if (dim(data)[1] <= 0) stop("no valid observation without missing values")
	}
	else if (sum(comp)!=length(comp) && print) cat("Note: Missing data was found. This will increase computing time in forward steps \n and could lead to an incorrect subset of variable if the sample size change too much. \n")

	if (interactive_mode && print) cat("<<~ Interative mode enabled ~>>\n")

	# Making all elements NULL instead
	if (is.null(latent_var_extra)) latent_var_extra = vector("list", k)

	empty_start_dataset = FALSE
	for (i in 1:k){
		if (is.null(latent_var_original[[i]]) || NCOL(latent_var_original[[i]])==0){
			if (i != search) stop("If search=i, you can set latent_var_original[[i]]=NULL but not latent_var_original[[j]]=NULL. When search=0, you must start with at least an element in every latent variable.")
			else if (search_type=="bidirectional-backward" || search_type=="backward") stop ("If search=i, you can normally set latent_var_original[[i]]=NULL but not with backward search because you must start from somewhere before dropping variables.")
			else empty_start_dataset = TRUE
			# Make an empty dataframe instead of NULL
			latent_var_original[[i]] = latent_var_extra[[i]][,-c(1:NCOL(latent_var_extra[[i]])), drop=FALSE]
		}
		if (is.null(latent_var_extra[[i]]) || NCOL(latent_var_extra[[i]])==0){
			# Make an empty dataframe instead of NULL
			latent_var_extra[[i]] = latent_var_original[[i]][,-c(1:NCOL(latent_var_original[[i]])), drop=FALSE]
		}
	}

	# Setting up initial start
	if (is.null(start_latent_var)){
		start_latent_var = vector("list", k)
		for (i in 1:k){
			if (empty_start_dataset && search==i) start_latent_var[[i]]=c()
			else start_latent_var[[i]] = rep(1/NCOL(latent_var_original[[i]]),NCOL(latent_var_original[[i]]))
		}
	}
	fit = NULL

	if (search_type == "forward"){
		if (print){
			if (forward_exclude_p_bigger < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger, " and which inclusion decrease the AIC\n"))
			else if (forward_exclude_p_bigger < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Keeping only variables which inclusion decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || forward_exclude_p_bigger > .2)) cat("Note : We recommend using exclude_worse_AIC=TRUE and forward_exclude_p_bigger <= .20 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search==0) cat(paste0("Forward search of the elements from all latent variables to find the model with the ", string_choice,"\n"))
			else cat(paste0("Forward search of the elements from ", names(latent_var_original)[search]," to find the model with the ", string_choice,"\n"))
		}
		latent_var_current = latent_var_original
		if (!empty_start_dataset){
			# Original model
			fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
		}
		else{
			# If dataset is NULL, then make the current dataset be an empty data frame of same size as the dataset with extra variables 
			if (is.null(latent_var_original[[search]])) latent_var_original[[search]] = latent_var_extra[[search]][,-c(1:NCOL(latent_var_extra[[search]]))]
		}
		latent_var_toadd = latent_var_extra
		if (print) cat("\n")

		if (search == 0){
			latent_var_conv = rep(FALSE, k)
			search_current = 1
		}
		else search_current = search
		# Overall convergence
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			results = forward_step_IM(empty_start_dataset=empty_start_dataset, fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, latent_var_current=latent_var_current, latent_var_toadd=latent_var_toadd, search=search_current, search_criterion=search_criterion, p_threshold = forward_exclude_p_bigger, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
			if (is.null(results)){
				if (search == 0){
					latent_var_conv[search_current] = TRUE
					if (sum(latent_var_conv)==k) conv = TRUE
					if (search_current != k) search_current = search_current + 1
					else search_current = 1
				}
				else conv = TRUE
				if (conv) break
			}
			else{
				# If this one didn't converge then all others have to be retried again since things can change
				if (search == 0) latent_var_conv = rep(FALSE, k)
				# Resetting parameters based on iteration results
				empty_start_dataset = FALSE
				fit = results$fit
				for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
				latent_var_current=results$latent_var_current
				latent_var_toadd=results$latent_var_toadd
			}
		}
	}
	if (search_type == "backward"){
		if (print){
			if (backward_exclude_p_smaller < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller, " and which removal decrease the AIC\n"))
			else if (backward_exclude_p_smaller < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Dropping only variables which removal decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || backward_exclude_p_smaller < .01)) cat("Note : We recommend using exclude_worse_AIC=TRUE and backward_exclude_p_smaller >= .01 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search==0) cat(paste0("Backward search of the elements from all latent variables to find the model with the ", string_choice,"\n"))
			else cat(paste0("Backward search of the elements from ", names(latent_var_original)[search]," to find the model with the ", string_choice,"\n"))

		}
		latent_var_current = latent_var_original
		# Original model
		fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
		for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
		# Creating empty data frames of same size as the other data frames
		latent_var_dropped = latent_var_original
		for (i in 1:k) latent_var_dropped[[i]] = latent_var_original[[i]][,-c(1:NCOL(latent_var_original[[i]]))]

		if (print) cat("\n")

		if (search == 0){
			latent_var_conv = rep(FALSE, k)
			search_current = 1
		}
		else search_current = search
		# Overall convergence
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			results = backward_step_IM(fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, latent_var_current=latent_var_current, latent_var_dropped=latent_var_dropped, search=search_current, search_criterion=search_criterion, p_threshold = backward_exclude_p_smaller, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
			if (is.null(results)){
				if (search==0){
					latent_var_conv[search_current] = TRUE
					if (sum(latent_var_conv)==k) conv = TRUE
					if (search_current != k) search_current = search_current + 1
					else search_current = 1
				}
				else conv = TRUE
				if (conv) break
			}
			else{
				# If this one didn't converge then all others have to be retried again since things can change
				if (search == 0) latent_var_conv = rep(FALSE, k)
				# Resetting parameters based on iteration results
				fit = results$fit
				for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
				latent_var_current=results$latent_var_current
				latent_var_dropped=results$latent_var_dropped
				if (NCOL(latent_var_current[[search_current]])==1){
					if (search == 0){
						latent_var_conv[search_current] = TRUE
						if (sum(latent_var_conv) == k) conv = TRUE	
						if (search_current != k) search_current = search_current + 1
						else search_current = 1
					}
					else conv = TRUE
					if (conv) break
				}
			}
		}
	}
	if (search_type == "bidirectional-forward" || search_type == "bidirectional-backward"){
		if (print){
			if (forward_exclude_p_bigger < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger, " and which inclusion decrease the AIC\n"))
			else if (forward_exclude_p_bigger < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Keeping only variables with p-values smaller than ", forward_exclude_p_bigger,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Keeping only variables which inclusion decrease the AIC\n"))
			if (backward_exclude_p_smaller < 1 && (exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller, " and which removal decrease the AIC\n"))
			else if (backward_exclude_p_smaller < 1 && !(exclude_worse_AIC || search_criterion=="AIC")) cat(paste0("Dropping only variables with p-values bigger than ", backward_exclude_p_smaller,"\n"))
			else if (exclude_worse_AIC || search_criterion=="AIC") cat(paste0("Dropping only variables which removal decrease the AIC\n"))
			if ((search_criterion=="cv" || search_criterion=="cv_AUC" || search_criterion=="cv_Huber" || search_criterion=="cv_L1") && (!exclude_worse_AIC || forward_exclude_p_bigger > .2)) cat("Note : We recommend using exclude_worse_AIC=TRUE and forward_exclude_p_bigger <= .20 to reduce the amount of cross-validations needed and to make the algorithm much faster.\n")
			if (search_criterion=="cv_AUC") classification=TRUE
			if (search==0) cat(paste0("Bidirectional search of the elements from all latent variables to find the model with the ", string_choice,"\n"))
			else cat(paste0("Bidirectional search of the elements from ", names(latent_var_original)[search]," to find the model with the ", string_choice,"\n"))

		}
		if (search_type == "bidirectional-forward"){
			latent_var_current = latent_var_original
			if (!empty_start_dataset){
				# Original model
				fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
				for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
			}
			else{
				# If dataset is NULL, then make the current dataset be an empty data frame of same size as the dataset with extra variables 
				if (is.null(latent_var_original[[search]])) latent_var_original[[search]] = latent_var_extra[[search]][,-c(1:NCOL(latent_var_extra[[search]]))]
			}
			latent_var_toadd_ordrop = latent_var_extra

			direction="forward"
			forward_failed=FALSE
			# Count as failed because we can't backward if forward doesn't work
			backward_failed=TRUE
		}
		if (search_type == "bidirectional-backward"){
			latent_var_current = latent_var_original
			# Original model
			fit = IMLEGIT(data=data, latent_var=latent_var_current, formula=formula, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, print=FALSE)
			for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
			# Creating empty data frames of same size as the other data frames
			latent_var_toadd_ordrop = latent_var_original
			for (i in 1:k) latent_var_toadd_ordrop[[i]] = latent_var_original[[i]][,-c(1:NCOL(latent_var_original[[i]]))]
			direction="backward"
			# Count as failed because we can't backward if forward doesn't work
			forward_failed=TRUE
			backward_failed=FALSE
		}
		if (print) cat("\n")

		if (search == 0){
			latent_var_conv = rep(FALSE, k)
			search_current = 1
		}
		else search_current = search
		# Overall convergence
		conv = FALSE

		for (i in 1:max_steps){
			if (print) cat(paste0("[Iteration: ",i,"]\n"))
			if (direction=="forward"){
				results = forward_step_IM(empty_start_dataset=empty_start_dataset, fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, latent_var_current=latent_var_current, latent_var_toadd=latent_var_toadd_ordrop, search=search_current, search_criterion=search_criterion, p_threshold = forward_exclude_p_bigger, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
				if (is.null(results)) forward_failed=TRUE
				else{
					if (search == 0) latent_var_conv = rep(FALSE, k)
					forward_failed=FALSE
					# Resetting parameters based on iteration results
					empty_start_dataset = FALSE
					fit = results$fit
					for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
					latent_var_current=results$latent_var_current
					latent_var_toadd_ordrop=results$latent_var_toadd
					if (NCOL(latent_var_toadd_ordrop[[search_current]])==0){
						# Count as a fail
						forward_failed=TRUE
					}
				}
				direction="backward"
			}
			if (forward_failed && backward_failed){
				if (search == 0){
					backward_failed = FALSE
					forward_failed = FALSE
					latent_var_conv[search_current] = TRUE
					if (sum(latent_var_conv)==k) conv = TRUE
					if (search_current != k) search_current = search_current + 1
					else search_current = 1
					# Count as a fail
					if (NCOL(latent_var_current)==1) backward_failed=TRUE
					if (NCOL(latent_var_toadd_ordrop)==0) forward_failed=TRUE
					if (search_type == "bidirectional-forward") direction="forward"
					if (search_type == "bidirectional-backward") direction="backward"
				}
				else conv = TRUE
				if (conv) break
			}
			if (direction=="backward"){
				# Can't backward if only one remaining variable
				if (!(NCOL(latent_var_current[[search_current]])<=1)){
					results = backward_step_IM(fit=fit, data=data, formula=formula, interactive_mode=interactive_mode, latent_var_current=latent_var_current, latent_var_dropped=latent_var_toadd_ordrop, search=search_current, search_criterion=search_criterion, p_threshold = backward_exclude_p_smaller, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=seed, print=print)
					if (is.null(results)) backward_failed=TRUE
					else{
						# If this one didn't converge then all others have to be retried again since things can change
						if (search == 0) latent_var_conv = rep(FALSE, k)
						backward_failed=FALSE
						# Resetting parameters based on iteration results
						fit = results$fit
						for (i in 1:k) start_latent_var[[i]] = stats::coef(fit$fit_latent_var[[i]])
						one_remain=FALSE
						latent_var_current=results$latent_var_current
						latent_var_toadd_ordrop=results$latent_var_dropped
						if (NCOL(latent_var_current[[search_current]])==1){
							# Count as a fail
							backward_failed=TRUE
						}
					}
				}
				else{
					backward_failed=TRUE
					if (print) cat(paste0("No element from ", names(latent_var_original)[search_current]," was removed\n"))
				}
				direction="forward"
			}
			if (forward_failed && backward_failed){
				if (search==0){
					backward_failed = FALSE
					forward_failed = FALSE

					if (search == 0) latent_var_conv[search_current] = TRUE
					if (sum(latent_var_conv) == k) conv = TRUE	
					if (search_current != k) search_current = search_current + 1
					else search_current = 1
					# Count as a fail
					if (NCOL(latent_var_current[[search_current]])==1) backward_failed=TRUE
					if (NCOL(latent_var_toadd_ordrop[[search_current]])==0) forward_failed=TRUE

					if (search_type == "bidirectional-forward") direction="forward"
					if (search_type == "bidirectional-backward") direction="backward"
				}
				else conv = TRUE
				if (conv) break
			}
		}
	}
	if (i >= max_steps) warning("Stepwise search did not reach convergence in max_steps steps. Try increasing max_steps.")
	return(fit)
}


bootstrap_var_select = function(data, formula, boot_iter=1000, boot_size=NULL, boot_group=NULL, latent_var_original=NULL, latent_var_extra=NULL, search_type="bidirectional-forward", search=0, search_criterion="AIC", forward_exclude_p_bigger = .20, backward_exclude_p_smaller = .01, exclude_worse_AIC=TRUE, max_steps = 100, cv_iter=5, cv_folds=10, folds=NULL, Huber_p=1, classification=FALSE, start_latent_var=NULL, eps=.01, maxiter=25, family=gaussian, seed=NULL, progress=TRUE, n_cluster = 1){
	k = max(length(latent_var_original),length(latent_var_extra))
	## Removing missing data and checks
	# Retaining only the needed variables from the dataset (need to set G and E variables for this to work, they will be replaced with their proper values later)
	data=data.frame(data)
	for (i in 1:k) data[,names(latent_var_original)[i]] = 0
	data = stats::model.frame(formula, data=data, na.action=na.pass)
	#  Check for empty latent variable (Note: Probably shouldn't be named empty_start_dataset but empty_start_latent_var althought that would be even more confusing considering start_latent_var)
	if (is.null(latent_var_original)) stop("latent_var_original cannot be null. However, if search=i, then you could set latent_var_original[[i]]=NULL.")
	# Make it to have NULL elements but not be NULL
	comp = rep(TRUE, NROW(data))
	for (i in 1:k){
		if (!is.null(latent_var_original[[i]])) comp = comp & stats::complete.cases(data, latent_var_original[[i]])
		if (!is.null(latent_var_extra) && !is.null(latent_var_extra[[i]])) comp = comp & stats::complete.cases(data, latent_var_extra[[i]])
	}
	if (!is.null(boot_group)) boot_group = boot_group[comp]
	data = data[comp,, drop=FALSE]
	for (i in 1:k){
		if (!is.null(latent_var_original[[i]])) latent_var_original[[i]] = latent_var_original[[i]][comp,, drop=FALSE]
		if (!is.null(latent_var_extra)){
			if (!is.null(latent_var_extra[[i]])) latent_var_extra[[i]] = latent_var_extra[[i]][comp,, drop=FALSE]
		}
	}
	if (dim(data)[1] <= 0) stop("no valid observation without missing values")

	# Making all elements NULL instead
	if (is.null(latent_var_extra)) latent_var_extra = vector("list", k)

	## Bootstrapping
	if (is.null(boot_size)) boot_size = NROW(data)
	# Create list of variable counts
	var_select = vector("list", k)
	for (i in 1:k){
		if (is.null(latent_var_extra[[i]])) latent_var_all = latent_var_original[[i]]
		else if (is.null(latent_var_original[[i]])) latent_var_all = latent_var_extra[[i]]
		else latent_var_all = cbind(latent_var_original[[i]],latent_var_extra[[i]])
		var_select[[i]] = rep(0, NCOL(latent_var_all))
		names(var_select[[i]]) = colnames(latent_var_all)
	}
	# Setting up parallel
	cl <- snow::makeCluster(n_cluster)
	doSNOW::registerDoSNOW(cl)
	if (progress){
		pb = utils::txtProgressBar(max = boot_iter, style = 3)
		progress <- function(n) utils::setTxtProgressBar(pb, n)
		opts <- list(progress = progress)
	}
	else opts <- list()
	# Function for combining results together
	combine_list <- function(x, y) Map("+", x, y)
	# Need to use this "with(c(),CODEHERE)" to prevent R check from returning a "no visible binding for global variable"
	with(c(),{
		var_select <- foreach::foreach(b = 1:boot_iter, .combine=combine_list, .options.snow = opts) %dopar% {
			if (!is.null(seed)) set.seed(seed+b)
			if (is.null(boot_group)) boot = sample(1:NROW(data),size=boot_size,replace=TRUE)
			else{
				boot = c()
				N = NROW(data)
				repeat{
					id_sampled = sample(unique(boot_group),size=1)
					toadd = which(boot_group == id_sampled)
					current_size = length(boot)
					if (abs(N-current_size) > abs(N - current_size - length(toadd))) boot = c(boot, toadd)
					else break
				}
			}
			data_b = data[boot,, drop=FALSE]
			latent_var_original_b = latent_var_original
			latent_var_extra_b = latent_var_extra
			for (i in 1:k){
				if (!is.null(latent_var_original[[i]])) latent_var_original_b[[i]] = latent_var_original_b[[i]][boot,, drop=FALSE]
				if (!is.null(latent_var_extra)){
					if (!is.null(latent_var_extra[[i]])) latent_var_extra_b[[i]] = latent_var_extra_b[[i]][boot,, drop=FALSE]
				}
			}
			results = stepwise_search_IM(data_b, formula, interactive_mode=FALSE, latent_var_original=latent_var_original_b, latent_var_extra=latent_var_extra_b, search_type=search_type, search=search, search_criterion=search_criterion, forward_exclude_p_bigger = forward_exclude_p_bigger, backward_exclude_p_smaller = backward_exclude_p_smaller, exclude_worse_AIC=exclude_worse_AIC, max_steps = max_steps, cv_iter=cv_iter, cv_folds=cv_folds, folds=folds, Huber_p=Huber_p, classification=classification, start_latent_var=start_latent_var, eps=eps, maxiter=maxiter, family=family, seed=NULL, print=FALSE, remove_miss=TRUE)
			var_select_current = var_select
			for (i in 1:k){
				kept = names(stats::coef(results$fit_latent_var[[i]]))
				var_select_current[[i]] = names(var_select_current[[i]]) %in%  kept
			}
			return(var_select_current)
		}
		close(pb)
		snow::stopCluster(cl)
	})
	names(var_select) = names(latent_var_original)
	for (i in 1:k){
		if (is.null(latent_var_extra[[i]])) latent_var_all = latent_var_original[[i]]
		else if (is.null(latent_var_original[[i]])) latent_var_all = latent_var_extra[[i]]
		else latent_var_all = cbind(latent_var_original[[i]],latent_var_extra[[i]])
		names(var_select[[i]]) = colnames(latent_var_all)
		var_select[[i]] = sort(var_select[[i]], decreasing = TRUE)
		var_select[[i]] = var_select[[i]]/boot_iter
	}
	cat("\nDone\n")
	return(var_select)
}