#'@rdname CST_Analogs
#'@title Downscaling using Analogs based on coarse scale fields.
#' 
#'@author J. Ramon, \email{jaumeramong@gmail.com}
#'@author E. Duzenli, \email{eren.duzenli@bsc.es}
#'
#'@description This function performs a downscaling using Analogs. To compute 
#'the analogs given a coarse-scale field, the function looks for days with similar conditions 
#'in the historical observations. The analogs function determines the N best analogs based 
#'on Euclidian distance, distance correlation, or Spearman's correlation metrics. To downscale 
#'a local-scale variable, either the variable itself or another large-scale variable 
#'can be utilized as the predictor. In the first scenario, analogs are examined between 
#'the observation and model data of the same local-scale variable. In the latter scenario, 
#'the function identifies the day in the observation data that closely resembles 
#'the large-scale pattern of interest in the model. When it identifies the date of 
#'the best analog, the function extracts the corresponding local-scale variable for that day 
#'from the observation of the local scale variable. The used local-scale and large-scale 
#'variables can be retrieved from independent regions. The input data for the first case must 
#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'exp' are the 
#'required input fields. Users can perform the downscaling process over the subregions 
#'that can be identified through the 'region' argument, instead of focusing 
#'on the entire area of the loaded data.
#'The search of analogs must be done in the longest dataset posible, but might 
#'require high-memory computational resources. This is important since it is 
#'necessary to have a good representation of the possible states of the field in 
#'the past, and therefore, to get better analogs. The function can also look for 
#'analogs within a window of D days, but is the user who has to define that window. 
#'Otherwise, the function will look for analogs in the whole dataset. This function 
#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal 
#'and decadal predictions) but can admit climate projections or reanalyses. It does 
#'not have constrains of specific region or variables to downscale.
#'
#'@param exp an 's2dv_cube' object with named dimensions containing the experimental field 
#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) 
#'or for the large-scale variable used as the predictor (if obsL is provided).
#'The object must have, at least, the dimensions latitude, longitude, start date and time.
#'The object is expected to be already subset for the desired region. Data can be in one 
#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct
#'results in the latter case, the borders of the region should be specified in the parameter
#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. 
#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE.
#'@param obs an 's2dv_cube' object with named dimensions containing the observational field 
#'for the variable targeted for downscaling. The object must have, at least, the dimensions 
#'latitude, longitude, start date and either time or window. 
#'The object is expected to be already subset for the desired region. 
#'@param obsL an 's2dv_cube' object with named dimensions containing the observational 
#'field of the large-scale variable. The object must have, at least, the dimensions latitude, 
#'longitude, start date and either time or window. The object is expected to be already 
#'subset for the desired region.
#'@param grid_exp a character vector with a path to an example file of the exp  
#'data. It can be either a path to another NetCDF file which to read the target grid from 
#'(a single grid must be defined in such file) or a character vector indicating the 
#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO.
#'@param nanalogs an integer indicating the number of analogs to be searched
#'@param fun_analog a function to be applied over the found analogs. Only these options
#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), 
#'the function returns the found analogs.
#'@param lat_dim a character vector indicating the latitude dimension name in the element
#''data' in exp and obs. Default set to "lat".
#'@param lon_dim a character vector indicating the longitude dimension name in the element
#''data' in exp and obs. Default set to "lon".
#'@param sdate_dim a character vector indicating the start date dimension name in the
#'element 'data' in exp and obs. Default set to "sdate".
#'@param time_dim a character vector indicating the time dimension name in the element
#''data' in exp and obs. Default set to "time". 
#'@param member_dim a character vector indicating the member dimension name in the element
#''data' in exp and obs. Default set to "member". 
#'@param metric a character vector to select the analog specification method. Only these 
#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) 
#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist".
#'@param region a numeric vector indicating the borders of the downscaling region.
#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers
#'to the left border, while lonmax refers to the right border. latmin indicates the lower
#'border, whereas latmax indicates the upper border. If set to NULL (default), the function
#'uses the full obs grid as the downscaling region.
#'@param return_indices a logical vector indicating whether to return the indices of the
#'analogs together with the downscaled fields. Default to FALSE.
#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension
#''window'. It indicates whether to apply leave-one-out cross-validation in the creation 
#'of the window. In this procedure, all data from the corresponding year are excluded 
#'(e.g., all days from that year) so that the analogs are selected only from the remaining years.
#'It is recommended to be set to TRUE. Default to TRUE.
#'@param ncores an integer indicating the number of cores to use in parallel computation. 
#'The default value is NULL.
#'@import CSTools
#'@return An 's2dv_cube' object. The element 'data' contains the dowscaled field, 'lat' the
#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL
#'(default), the output array in 'data' also contains the dimension 'analog' with the best 
#'analog days.
#'@examples
#'exp <- rnorm(15000)
#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time  = 30)
#'exp_lons <- 1:5
#'exp_lats <- 1:4
#'obs <- rnorm(27000)
#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time  = 30)
#'obs_lons <- seq(0,6, 6/14)
#'obs_lats <- seq(0,6, 6/11)
#'exp <- CSTools::s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons))
#'obs <- CSTools::s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons))
#'if (Sys.which("cdo") != "") {
#'downscaled_field <- CST_Analogs(exp = exp, obs = obs, grid_exp = 'r360x180')
#'}
#'@export
CST_Analogs <- function(exp, obs, obsL = NULL, grid_exp, nanalogs = 3, 
                        fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", 
                        time_dim = "time", member_dim = "member", metric = "dist", region = NULL, 
                        return_indices = FALSE, loocv_window = TRUE, ncores = NULL) {

  # input exp and obs must be s2dv_cube objects
  if (!inherits(obs,'s2dv_cube')) {
    stop("Parameter 'obs' must be of the class 's2dv_cube'")
  }

  if (!is.null(obsL)) {
    # input obs must be s2dv_cube objects
    if (!inherits(obsL,'s2dv_cube')) {
      stop("Parameter 'obsL' must be of the class 's2dv_cube'")
    }
  }
    # input exp and obs must be s2dv_cube objects
  if (!inherits(exp,'s2dv_cube')) {
    stop("Parameter 'exp' must be of the class 's2dv_cube'")
  }

  res <- Analogs(exp = exp$data, obs = obs$data,  obsL = obsL$data, 
                 exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], 
                 obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], 
                 obsL_lats = obsL$coords[[lat_dim]], obsL_lons = obsL$coords[[lon_dim]],
                 grid_exp = grid_exp, nanalogs = nanalogs, fun_analog = fun_analog, 
                 lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, 
                 time_dim = time_dim, member_dim = member_dim, metric = metric,
                 region = region, return_indices = return_indices, 
                 loocv_window = loocv_window, ncores = ncores)

  # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data
  exp$data <- res$data
  exp$dims <- dim(exp$data)
  exp$coords[[lon_dim]] <- res$lon
  exp$coords[[lat_dim]] <- res$lat

  obs$data <- res$obs
  obs$dims <- dim(obs$data)
  obs$coords[[lon_dim]] <- res$lon
  obs$coords[[lat_dim]] <- res$lat

  res_s2dv <- list(exp = exp, obs = obs)
  return(res_s2dv)
}

#'@rdname Analogs
#'@title Downscaling using Analogs based on large scale fields.
#' 
#'@author J. Ramon, \email{jaumeramong@gmail.com}
#'@author E. Duzenli, \email{eren.duzenli@bsc.es}
#'@author Ll. Lledó, \email{llorenc.lledo@ecmwf.int}
#'
#'@description This function performs a downscaling using Analogs. To compute 
#'the analogs given a coarse-scale field, the function looks for days with similar conditions 
#'in the historical observations. The analogs function determines the N best analogs based 
#'on Euclidian distance, distance correlation, or Spearman's correlation metrics. To downscale 
#'a local-scale variable, either the variable itself or another large-scale variable 
#'can be utilized as the predictor. In the first scenario, analogs are examined between 
#'the observation and model data of the same local-scale variable. In the latter scenario, 
#'the function identifies the day in the observation data that closely resembles 
#'the large-scale pattern of interest in the model. When it identifies the date of 
#'the best analog, the function extracts the corresponding local-scale variable for that day 
#'from the observation of the local scale variable. The used local-scale and large-scale 
#'variables can be retrieved from independent regions. The input data for the first case must 
#'include 'exp' and 'obs,' while in the second case, 'obs,' 'obsL,' and 'exp' are the 
#'required input fields. Users can perform the downscaling process over the subregions 
#'that can be identified through the 'region' argument, instead of focusing 
#'on the entire area of the loaded data.
#'The search of analogs must be done in the longest dataset posible, but might 
#'require high-memory computational resources. This is important since it is 
#'necessary to have a good representation of the possible states of the field in 
#'the past, and therefore, to get better analogs. The function can also look for 
#'analogs within a window of D days, but is the user who has to define that window. 
#'Otherwise, the function will look for analogs in the whole dataset. This function 
#'is intended to downscale climate prediction data (i.e., sub-seasonal, seasonal 
#'and decadal predictions) but can admit climate projections or reanalyses. It does 
#'not have constrains of specific region or variables to downscale.

#'@param exp an array with named dimensions containing the experimental field 
#'on the coarse scale for the variable targeted for downscaling (in case obsL is not provided) 
#'or for the large-scale variable used as the predictor (if obsL is provided).
#'The object must have, at least, the dimensions latitude, longitude, start date and time.
#'The object is expected to be already subset for the desired region. Data can be in one 
#'or two integrated regions, e.g., crossing the Greenwich meridian. To get the correct
#'results in the latter case, the borders of the region should be specified in the parameter
#''region'. See parameter 'region'. Also, the object can be either hindcast or forecast data. 
#'However, if forecast data is provided, the loocv_window parameter should be selected as FALSE.
#'@param obs an array with named dimensions containing the observational field for the variable
#'targeted for downscaling. The object must have, at least, the dimensions latitude, longitude,
#'start date and either time or window. The object is expected to be already subset for the 
#'desired region. Optionally, 'obs' can have the dimension 'window', containing the sampled  
#'fields into which the function will look for the analogs.  
#'Otherwise, the function will look for analogs using all the possible fields contained in obs.
#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must 
#'range from -90 to 90.
#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes 
#'can range from -180 to 180 or from 0 to 360.
#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must
#'range from -90 to 90.
#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes
#'can range from -180 to 180 or from 0 to 360.
#'@param obsL an 's2dv_cube' object with named dimensions containing the observational 
#'field of the large-scale variable.The object must have, at least, the dimensions latitude, 
#'longitude, start date and either time or window. The object is expected to be already subset for
#'the desired region. Optionally, 'obsL' can have the dimension 'window', containing 
#'the sampled fields into which the function will look for the analogs. 
#'Otherwise, the function will look for analogs using all the possible fields contained in obs.
#'@param obsL_lats a numeric vector containing the latitude values in 'obsL'. Latitudes must
#'range from -90 to 90.
#'@param obsL_lons a numeric vector containing the longitude values in 'obsL'. Longitudes
#'can range from -180 to 180 or from 0 to 360.
#'@param grid_exp a character vector with a path to an example file of the exp data. 
#'It can be either a path to another NetCDF file which to read the target grid from 
#'(a single grid must be defined in such file) or a character vector indicating the 
#'coarse grid to be passed to CDO, and it must be a grid recognised by CDO.
#'@param nanalogs an integer indicating the number of analogs to be searched.
#'@param fun_analog a function to be applied over the found analogs. Only these options
#'are valid: "mean", "wmean", "max", "min", "median" or NULL. If set to NULL (default), 
#'the function returns the found analogs.
#'@param lat_dim a character vector indicating the latitude dimension name in the element
#''data' in exp and obs. Default set to "lat".
#'@param lon_dim a character vector indicating the longitude dimension name in the element
#''data' in exp and obs. Default set to "lon".
#'@param sdate_dim a character vector indicating the start date dimension name in the
#'element 'data' in exp and obs. Default set to "sdate".
#'@param time_dim a character vector indicating the time dimension name in the element
#''data' in exp and obs. Default set to "time". 
#'@param member_dim a character vector indicating the member dimension name in the element
#''data' in exp and obs. Default set to "member". 
#'@param metric a character vector to select the analog specification method. Only these 
#'options are valid: "dist" (i.e., Euclidian distance), "dcor" (i.e., distance correlation) 
#'or "cor" (i.e., Spearman's .correlation). The default metric is "dist".
#'@param region a numeric vector indicating the borders of the downscaling region.
#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers
#'to the left border, while lonmax refers to the right border. latmin indicates the lower
#'border, whereas latmax indicates the upper border. If set to NULL (default), the function
#'uses the full obs grid as the downscaling region.
#'@param return_indices a logical vector indicating whether to return the indices of the
#'analogs together with the downscaled fields. The indices refer to the position of the
#'element in the vector time * start_date. If 'obs' contain the dimension 'window', it will
#'refer to the position of the element in the dimension 'window'.  Default to FALSE.
#'@param loocv_window a logical vector only to be used if 'obs' does not have the dimension
#''window'. It indicates whether to apply leave-one-out cross-validation in the creation 
#'of the window. It is recommended to be set to TRUE. Default to TRUE.
#'@param ncores an integer indicating the number of cores to use in parallel computation. 
#'The default value is NULL.
#'@import multiApply
#'@import CSTools
#'@importFrom s2dv InsertDim CDORemap
#'@importFrom ClimProjDiags Subset
#'@importFrom utils head
#'@importFrom stats cor weighted.mean dist
#'
#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the
#'downscaled latitudes, and 'lon' the downscaled longitudes. If fun_analog is set to NULL
#'(default), the output array in 'data' also contains the dimension 'analog' with the best 
#'analog days.
#'@examples
#'exp <- rnorm(15000)
#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5, time  = 30)
#'exp_lons <- 1:5
#'exp_lats <- 1:4
#'obs <- rnorm(27000)
#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5, time  = 30)
#'obs_lons <- seq(0,6, 6/14)
#'obs_lats <- seq(0,6, 6/11)
#'if (Sys.which("cdo") != "") {
#'downscaled_field <- Analogs(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, 
#'                            obs_lats = obs_lats, obs_lons = obs_lons, grid_exp = 'r360x180')
#' }
#'@export
Analogs <- function(exp, obs, exp_lats = NULL, exp_lons = NULL, obs_lats, obs_lons, 
                    grid_exp, obsL = NULL, obsL_lats = NULL, obsL_lons = NULL, nanalogs = 3, 
                    fun_analog = NULL, lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", 
                    time_dim = "time", member_dim = "member", metric = "dist", region = NULL, 
                    return_indices = FALSE, loocv_window = TRUE, ncores = NULL) {
  #-----------------------------------
  # Checkings
  #-----------------------------------
  if (!inherits(grid_exp, 'character')) {
    stop("Parameter 'grid_exp' must be of class 'character'. It can be either a path ",
         "to another NetCDF file which to read the target grid from (a single grid must be ",
         "defined in such file) or a character vector indicating the coarse grid to ",
         "be passed to CDO, and it must be a grid recognised by CDO or a NetCDF file.")
  }

  if (!inherits(nanalogs, 'numeric')) {
    stop("Parameter 'nanalogs' must be of the class 'numeric'")
  }

  if (!inherits(lat_dim, 'character')) {
    stop("Parameter 'lat_dim' must be of the class 'character'")
  }

  if (!inherits(lon_dim, 'character')) {
    stop("Parameter 'lon_dim' must be of the class 'character'")
  }

  if (!inherits(sdate_dim, 'character')) {
    stop("Parameter 'sdate_dim' must be of the class 'character'")
  }

  if (!inherits(time_dim, 'character')) {
    stop("Parameter 'time_dim' must be of the class 'character'")
  }

    # Do not allow synonims for lat (latitude), lon (longitude) and time (sdate) dimension names 
  if (is.na(match(lon_dim, names(dim(exp)))) | is.na(match(lon_dim, names(dim(obs))))) {
    stop("Missing longitude dimension in 'exp' and/or 'obs', or does not match the parameter ",
         "'lon_dim'")
  } 

  if (is.na(match(lat_dim, names(dim(exp)))) | is.na(match(lat_dim, names(dim(obs))))) {
    stop("Missing latitude dimension in 'exp' and/or 'obs', or does not match the parameter ",
         "'lat_dim'")
  }

  if (!("window" %in% names(dim(obs)) )) {
    if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) {
      stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ",
           "'sdate_dim'")
    }
  }

  if (!("window" %in% names(dim(obs)))) { 
    if (is.na(match(time_dim, names(dim(exp)))) | is.na(match(time_dim, names(dim(obs))))) {
      stop("Missing time dimension in 'exp' and/or 'obs', or does not match the parameter ",
           "'time_dim'")
    }
  }

  # Ensure we have enough data to interpolate from high-res to coarse grid
  #if ((obs_lats[1] > exp_lats[1]) | (obs_lats[length(obs_lats)] < exp_lats[length(exp_lats)]) |
  #    (obs_lons[1] > exp_lons[1]) | (obs_lons[length(obs_lons)] < exp_lons[length(exp_lons)])) {

  #  stop("There are not enough data in 'obs'. Please to add more latitudes or ",
  #       "longitudes.")
  #}

  # the code is not yet prepared to handle members in the observations
  restore_ens <- FALSE
  if (member_dim %in% names(dim(obs))) {
    if (identical(as.numeric(dim(obs)[member_dim]), 1)) {
      restore_ens <- TRUE
      obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected')
    } else {
      stop("Not implemented for observations with members ('obs' can have 'member_dim', ",
           "but it should be of length = 1).")
    }
  }

  if (!is.null(obsL) ) {

    # the code is not yet prepared to handle members in the observations
    if (member_dim %in% names(dim(obsL))) {
      if (identical(as.numeric(dim(obsL)[member_dim]), 1)) {
        obsL <- ClimProjDiags::Subset(x = obsL, along = member_dim, indices = 1, drop = 'selected')
      } else {
        stop("Not implemented for observations with members ('obsL' can have 'member_dim', ",
             "but it should be of length = 1).")
      }
    }

    if (is.null(obsL_lats) | is.null(obsL_lons)) {
      stop("Missing latitudes and/or longitudes for the provided training observations. Please ",
           "provide them with the parametres 'obsL_lats' and 'obsL_lons'")
    }
    
    if (is.na(match(lon_dim, names(dim(obsL))))) {
      stop("Missing longitude dimension in 'obsL', or does not match the parameter 'lon_dim'")
    }

    if (is.na(match(lat_dim, names(dim(obsL))))) {
      stop("Missing latitude dimension in 'obsL', or does not match the parameter 'lat_dim'")
    }

    if (is.na(match(sdate_dim, names(dim(obsL))))) {
      stop("Missing start date dimension in 'obsL', or does not match the parameter 'sdate_dim'")
    }

    if (is.na(match(time_dim, names(dim(obsL))))) {
      stop("Missing time dimension in 'obsL', or does not match the parameter 'time_dim'")
    }
    
  }

  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) |
        length(ncores) > 1) {
      stop("Parameter 'ncores' must be a positive integer.")
    }
  }
  
  # Select a function to apply to the analogs selected for a given observation
  if (!is.null(fun_analog)) {
    stopifnot(fun_analog %in% c("mean", "wmean", "max", "min", "median"))
  }

  # metric method to be used to specify the analogs
  stopifnot(metric %in% c("cor", "dcor", "dist"))

  if (!("window" %in% names(dim(obs)) )) {
    if ( (dim(exp)[sdate_dim] != dim(obs)[sdate_dim]) & loocv_window ) {
      loocv_window <- FALSE
      warning("The lengths of sdate_dim in the data provided through exp and obs are ",
              "not the same. Thus, exp is considered as forecast data, and loocv_window ",
              "is set to FALSE.")
    }
  }
  if (!is.null(obsL)) {
    obs_train <- obsL
    obs_train_lats <- obsL_lats
    obs_train_lons <- obsL_lons
  } else {
    obs_train <- obs
    obs_train_lats <- obs_lats
    obs_train_lons <- obs_lons
  } 

  # Correct indices later if cross-validation
  loocv_correction <- FALSE
  if ( !("window" %in% names(dim(obs_train))) & loocv_window) {
    loocv_correction <- TRUE
  }

  if(is.null(region)) {
    warning("The borders of the downscaling region have not been provided. ",
            "The function is using the full 'obs' grid as the downscaling region.")
  } else {
    # crop downscaling region, if the argument region is provided.
    # if a border is equally distant from two different grids, 
    # the map will be cropped from the grid having smaller coordinate
    a <- which.min(abs((region[1]-obs_lons)))
    b <- which.min(abs((region[2]-obs_lons)))
    c <- which.min(abs((region[3]-obs_lats))) 
    d <- which.min(abs((region[4]-obs_lats)))
    obs <- ClimProjDiags::Subset(x = obs, along = list(lon_dim,lat_dim), 
                                 indices = list(a:b,c:d), drop = 'selected')
  }

  if (is.null(region) | !is.null(obsL)) {
    region <- c(exp_lons[1], exp_lons[length(exp_lons)], exp_lats[1], 
                exp_lats[length(exp_lats)])
  }

  if (.check_coords(lat1 = exp_lats, lat2 = obs_train_lats,
                    lon1 = exp_lons, lon2 = obs_train_lons)) {

    obs_interpolated <- NULL
    obs_interpolated$data <- obs_train
    exp_interpolated <- exp

  } else {

    obs_interpolated <- Interpolation(exp = obs_train, lats = obs_train_lats, lons = obs_train_lons,
                                      target_grid = grid_exp, lat_dim = lat_dim, lon_dim = lon_dim,
                                      method_remap = "conservative", region = region,
                                      ncores = ncores)

    # If the coordinates do not match after interpolating 'obs' data, the exp data is interpolated to
    # the same grid to force the matching
    if (!.check_coords(lat1 = as.numeric(obs_interpolated$lat),
                       lat2 = exp_lats,
                       lon1 = as.numeric(obs_interpolated$lon),
                       lon2 = exp_lons)) {
      exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons,
                                        target_grid = grid_exp, lat_dim = lat_dim,
                                        lon_dim = lon_dim, method_remap = "conservative",
                                        region = region, ncores = ncores)$data
    } else {
      exp_interpolated <- exp
    }
  }

  # Create window if user does not have it in the training observations
  if ( !("window" %in% names(dim(obs_interpolated$data))) ) {
    obs_train_interpolated <- .generate_window(obj = obs_interpolated$data, sdate_dim = sdate_dim,
                                               time_dim = time_dim, loocv = loocv_window,
                                               ncores = ncores)
    if (!is.null(obsL)) {
      if ( ("window" %in% names(dim(obs))) ) {
        stop("Either both obs and obsL should include 'window' dimension or none.")
      }
    }
    obs_hres <- .generate_window(obj = obs, sdate_dim = sdate_dim, time_dim = time_dim,
                                   loocv = loocv_window, ncores = ncores)

  } else {
    obs_train_interpolated <- obs_interpolated$data
    if (!is.null(obsL)) {
      if ( !("window" %in% names(dim(obs))) ) {
        stop("Either both obs and obsL should include 'window' dimension or none.")
      }
    }
    obs_hres <- obs
  }

  #-----------------------------------
  # Reshape train and test
  #-----------------------------------

  RES <- Apply(list(obs_train_interpolated, exp_interpolated, obs_hres),
                 target_dims = list(c("window", lat_dim, lon_dim), c(lat_dim, lon_dim),
                 c("window", lat_dim, lon_dim)),
                 fun = function(tr, te, ob) .analogs(train = tr, test = te, obs_hres = ob,
                 k = nanalogs, metric = metric, fun_analog = fun_analog),
                 ncores = ncores)  ## output1 -> data, output2 -> index, output3 -> metric

  res.data <- RES$output1  

  # Return the indices of the best analogs
  if (return_indices) {
    res.ind <- RES$output2

    # If cross-validation has been applied, correct the indices 
    if (loocv_correction) {
      nsdates <- dim(res.ind)[names(dim(res.ind)) == sdate_dim]
      ntimes <- dim(res.ind)[names(dim(res.ind)) == time_dim]
      res.ind <- Apply(res.ind, target_dims = c("index", sdate_dim), function(x)   
        as.matrix(sapply(1:nsdates, 
                    function(s) seq(ntimes * nsdates)[ - (ntimes * (s - 1) + 1:ntimes)][x[, s]])),
        output_dims = c("index", sdate_dim), ncores = ncores)$output1
      if (nanalogs == 1) {
        dims <- names(dim(res.ind))
        dims[dims == "index"] <- "tmp"
        dims[dims == sdate_dim] <- "index"
        dims[dims == "tmp"]   <- sdate_dim
        names(dim(res.ind)) <- dims
      }
    }

    # restore ensemble dimension in observations if it existed originally
    if (restore_ens) {
      obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim)
    }
 
    res <- list(data = res.data, ind = res.ind, obs = obs, lon = obs_lons, lat = obs_lats)
  }  else {
    # restore ensemble dimension in observations if it existed originally
     if (restore_ens) {
       obs <- s2dv::InsertDim(obs, posdim = 1, lendim = 1, name = member_dim)
     }
     res <- list(data = res.data, obs = obs, lon = obs_lons, lat = obs_lats)
  }

  return(res)
}

# For each element in test, find the indices of the k nearest neighbours in train
.analogs <- function(train, test, obs_hres, k, fun_analog, metric = NULL, return_indices = FALSE) {
  
  # train and obs_rs  dim: 3 dimensions window, lat and lon (in this order)
  # test  dim: 2 dimensions lat and lon (in this order)
  # Number of lats/lons of the high-resolution data
  space_dims_hres <- dim(obs_hres)[c(2,3)]

  # Reformat train and test as an array with (time, points)
  train <- apply(train, 1, as.vector); names(dim(train))[1] <- "space"
  test <- as.vector(test)
  obs_hres <- apply(obs_hres, 1, as.vector); names(dim(obs_hres))[1] <- "space"

  # Identify and remove NA's
  dum <- which(!apply(train,2,function (x) all(is.na(x))))[1] ## the column in which NA in 
                                                              ## space will be investigated. 
                                                              ## it shouldn't be all-NA time-step
  dum2 <- which(!apply(train,1,function (x) all(is.na(x))))[1] ## the row in which NA in 
                                                               ## time will be investigated. 
                                                               ## it shouldn't be all-NA time-step
  idx_na_tr <- is.na(train[ , dum]) # NA in space
  idy_na_tr <- is.na(train[dum2, ]) # NA in time
  idx_na_te <- is.na(test)
  idx_na <- idx_na_tr | idx_na_te
  tr_wo_na <- t(train[!idx_na , !idy_na_tr ])
  te_wo_na <- test[!idx_na]
  te_wo_na <- InsertDim(data = te_wo_na, posdim = 1, lendim = 1, name = "time")

  if (all(is.na(test))) {
    res <- array(NA, space_dims_hres)
    res_ind <- array(NA, k)
    names(dim(res_ind)) <- c("index")
    res_metric <- array(NA, k)
    names(dim(res_metric)) <- c("metric")
  } else {
    if (metric == "dist") {
      dist_all <- sqrt(rowSums((sweep(tr_wo_na, 2, te_wo_na[1,]))^2)) # euc dist
      best_vals <- head(sort(dist_all), k)
      idx <- match(best_vals, dist_all)
    } else if (metric == "cor") {
      cor_all <- apply(tr_wo_na, 1, function (x) cor(x,te_wo_na[1, ], method = "spearman")) # cor 
      best_vals <- head(sort(cor_all, decreasing = TRUE), k)
      idx <- match(best_vals, cor_all)
    } else if (metric == "dcor") {
      dcor_all <- apply(tr_wo_na, 1, function (x) .dcor(x,te_wo_na[1, ])) # dcor 
      best_vals <- head(sort(dcor_all, decreasing = TRUE), k,)
      idx <- match(best_vals, dcor_all)
    }
    if (isTRUE(any(idy_na_tr))) {
      dum <-(1:length(idy_na_tr))[!idy_na_tr]
      idx <-  dum[idx]
    }
    res_ind <- array(idx, k)
    names(dim(res_ind)) <- c("index")
    res_metric <- array(best_vals, c(k))
    names(dim(res_metric)) <- c("metric")
    res <- obs_hres[ , idx]
    dim(res) <- c(space_dims_hres, analogs = k)

    if (!is.null(fun_analog)) {
      if (fun_analog == "wmean") {
        if (metric == "dist") {
          weight <- 1 / best_vals
        } else {
          weight <- best_vals
        }
        res <- apply(res, c(1,2), function(x) weighted.mean(x, weight))
      } else if (fun_analog == "min") {
        res <- res[, , which.min(best_vals)]
      } else if (fun_analog == "max") {
        res <- res[, , which.max(best_vals)]
      } else {
        res <- apply(res, c(1,2), fun_analog)
      }
    }
  }
  return(list(res, res_ind, res_metric))
}

# Add the dimension window to an array that contains, at least, the start date and time dimensions
# object has at least dimensions sdate and time
.generate_window <- function(obj, sdate_dim, time_dim, loocv, size = NULL, 
                             size_dim = "smonth", ncores = NULL) {

  rsdates <- 1:dim(obj)[names(dim(obj)) == sdate_dim]
  ntimes <- dim(obj)[names(dim(obj)) == time_dim] 
  rtimes <- 1:dim(obj)[names(dim(obj)) == time_dim] 

  # Generate a window containing all the data
  if (is.null(size)) {
  
    # Generate window removing one start date each time (leave-one-out cross-validation)
    if (loocv) {
        obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim),
          fun = function(x) sapply(rsdates, function(s) as.vector(x[ rtimes, -s])), 
          output_dims = c('window', sdate_dim), ncores = ncores)$output1
    # Generate window without cross-validation        
    } else {
        obj_window <- Apply(obj, target_dims = c(time_dim, sdate_dim),
          fun = as.vector, output_dims = 'window', ncores = ncores)$output1
    }
  }
  # Generate window of the size specified by the user. Only applied with CV
  else {
    # For an accurate generation of the window, it is mandatory to add some "extra" data.
    if (!(size_dim %in% names(dim(obj)))) {
      stop("Missing information on which dimension the analog search size  will be expanded.")
    }
   
    # Concatenate data from previous, target and posterior months
    obj_new <- Apply(obj, target_dims = list(c(time_dim, size_dim)),
      fun = as.vector, output_dims = time_dim, ncores = ncores )$output1

    if (loocv) { 
      dim(rsdates) <- length(rsdates)
      names(dim(rsdates)) <- sdate_dim
      obj_window <- Apply(list(obj_new,  rsdates), 
                          target_dims = list(c(time_dim, sdate_dim),  NULL),
                          fun = function(x, s) 
                            as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), -s]), 
                          output_dims = 'window', ncores = ncores)$output1  
    } else {
      obj_window <- Apply(obj_new, target_dims = c(time_dim, sdate_dim),
                          fun = function(x)  
                            as.vector(x[(ntimes + min(rtimes) - size):(ntimes + max(rtimes) + size), ]),
        output_dims = 'window', ncores = ncores)$output1
    }
  } 

  return(obj_window)
}

# Distance correlation function
.dcor <- function(x, y) {
  n <- length(x)
  
  # Calculate Euclidean distances for x
  dist_x <- as.matrix(dist(matrix(x)))
  # Calculate Euclidean distances for y
  dist_y <- as.matrix(dist(matrix(y)))
  
  # Centering matrices
  H <- diag(n) - 1/n
  
  # Centered distance matrices
  dist_centered_x <- H %*% dist_x %*% H
  dist_centered_y <- H %*% dist_y %*% H
  
  # Calculate the product of mean-centered distance matrices
  B <- dist_centered_x %*% t(dist_centered_y)
  C <- dist_centered_x %*% t(dist_centered_x)
  D <- dist_centered_y %*% t(dist_centered_y)
  
  # Calculate the distance covariance
  dcov_xy <- sqrt(sum(diag(B)))
  
  # Calculate the variances
  cov_xx <- sqrt(sum(diag(C)))
  cov_yy <- sqrt(sum(diag(D)))
  
  # Calculate the distance correlation
  dcor_xy <- dcov_xy / sqrt(cov_xx * cov_yy)
  
  return(dcor_xy)
}
 
