g.report.part5 = function(metadatadir=c(),f0=c(),f1=c(),loglocation=c(),
                          includenightcrit=c(),includedaycrit=c(),data_cleaning_file=c(),
                          includedaycrit.part5=2/3,
                          minimum_MM_length.part5=23, week_weekend_aggregate.part5=FALSE,
                          LUX_day_segments=c()) {
  # description: function to load milestone data generated by g.part5 and to merge these into a spreadsheet
  # here no additional information of analyses are added. This function therefore is primary to wrap up the up the output
  # from parallel processed accelerometer files
  
  # Note: argument includenightcrit and includedaycrit is not used anymore, and can be depricated
  
  if (length(includedaycrit) != 0 & length(includedaycrit.part5) == 0) includedaycrit.part5 = includedaycrit
  getValidDayIndices = function(x, includedaycrit.part5, window) {
    if (includedaycrit.part5 >= 0 & includedaycrit.part5 <= 1) { # if includedaycrit.part5 is used as a ratio
      includedaycrit.part5 = includedaycrit.part5 * 100
    } else if (includedaycrit.part5 > 1 & includedaycrit.part5 <= 25) { # if includedaycrit.part5 is used like includedaycrit as a number of hours
      includedaycrit.part5 = (includedaycrit.part5 / 24) * 100
    } else if (includedaycrit.part5 < 0 ) {
      warning("\nNegative value of includedaycrit.part5 is not allowed, please change.")
    } else if (includedaycrit.part5 > 25) {
      warning("\nIncorrect value of includedaycrit.part5, this should be a fraction of the day between zero and one or the number of hours in a day.")
    }
    maxpernwday = 100 - includedaycrit.part5
    include_window = rep(TRUE, nrow(x))
    if (length(data_cleaning_file) > 0) { # allow for forced relying on guider based on external data_cleaning_file
      DaCleanFile = read.csv(data_cleaning_file)
      days2exclude = which(DaCleanFile$ID %in% x$ID & DaCleanFile$day_part5 %in% x$window_number)
      if (length(days2exclude) > 0) {
        for (ri in 1:length(days2exclude)) {
          id2remove = DaCleanFile$ID[days2exclude[ri]]
          window2remove = DaCleanFile$day_part5[days2exclude[ri]]
          include_window[which(x$ID == id2remove & x$window_number == window2remove)] = FALSE
        }
      }
    } else {
      include_window = rep(TRUE,nrow(x))
    }
    # Note: Below we intentionally only sets a criteria on daytime, because for
    # the night time we only need start and end of the SPT window.
    if (window == "WW") {
      indices = which(x$nonwear_perc_day <= maxpernwday &
                        x$dur_spt_min > 0 & x$dur_day_min > 0 & include_window == TRUE)
    } else if (window == "MM") {
      indices = which(x$nonwear_perc_day <= maxpernwday &
                        x$dur_spt_min > 0 & x$dur_day_min > 0 &
                        x$dur_day_spt_min >= (minimum_MM_length.part5*60) &
                        include_window == TRUE)
      # Note: By default for MM analysis only full days are interesting (23 hours for one day in the year)
    }
    return(indices)
  }
  ms5.out = "/meta/ms5.out"
  if (file.exists(paste(metadatadir,ms5.out,sep=""))) {
    if (length(dir(paste(metadatadir,ms5.out,sep=""))) == 0) {
      try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    } else {
      try.generate.report = TRUE
    }
  } else {
    try.generate.report = FALSE #do not run this function if there is no milestone data from g.part5
    warning('Cannot generate part5 report because no part 5 milestone data is available. First run part 5 with argument mode=5.')
  }
  if (try.generate.report == TRUE) {
    #======================================================================
    # loop through meta-files
    fnames.ms5 = list.files(paste0(metadatadir,ms5.out),full.names=TRUE)
    if(f1 > length(fnames.ms5)) f1 = length(fnames.ms5)
    cat(" loading all the milestone data from part 5 this can take a few minutes\n")
    myfun = function(x) {
      load(file=x)
      cut = which(output[,1] == "")
      if (length(cut) > 0 & length(cut) < nrow(output)) {
        output = output[-cut,which(colnames(output) != "")]
      }
      out = as.matrix(output)
      return(out)
    }
    outputfinal = as.data.frame(do.call(rbind,lapply(fnames.ms5[f0:f1],myfun)),stringsAsFactors=FALSE)
    cut = which(sapply(outputfinal, function(x) all(x=="")) == TRUE) # Find columns filled with missing values which(output[1,] == "" & output[2,] == "")
    if (length(cut) > 0) {
      outputfinal = outputfinal[,-cut]
    }
    # split results to different spreadsheets in order to minimize individual filesize and to ease organising dataset
    uwi = as.character(unique(outputfinal$window))
    uTRLi = as.character(unique(outputfinal$TRLi))
    uTRMi = as.character(unique(outputfinal$TRMi))
    uTRVi = as.character(unique(outputfinal$TRVi))
    usleepparam = as.character(unique(outputfinal$sleepparam))
    # replace NaN by empty cell value
    for (kra in 1:ncol(outputfinal)) {
      krad = which(as.character(outputfinal[,kra]) == "NaN")
      if (length(krad) > 0) {
        outputfinal[krad,kra] = ""
      }
    }
    outputfinal$daytype = 0
    outputfinal$daytype[which(outputfinal$weekday == "Sunday" | outputfinal$weekday == "Saturday")] = "WE"
    outputfinal$daytype[which(outputfinal$weekday == "Monday" | outputfinal$weekday == "Tuesday" |
                                outputfinal$weekday == "Wednesday" | outputfinal$weekday == "Thursday" |
                                outputfinal$weekday == "Friday")] = "WD"
    outputfinal$nonwear_perc_day = as.numeric(outputfinal$nonwear_perc_day)
    outputfinal$nonwear_perc_spt = as.numeric(outputfinal$nonwear_perc_spt)
    outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
    outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
    outputfinal$guider = as.character(outputfinal$guider)
    outputfinal$sleeplog_used = as.numeric(outputfinal$sleeplog_used)
    outputfinal$dur_spt_min = as.numeric(outputfinal$dur_spt_min)
    outputfinal$dur_day_min = as.numeric(outputfinal$dur_day_min)
    outputfinal$dur_day_spt_min = as.numeric(outputfinal$dur_day_spt_min)
    # loop to store varous variants of the analysis seperately
    cat(" generating csv report for every parameter configurations...\n")
    for (j in 1:length(uwi)) {
      for (h1 in 1:length(uTRLi)) {
        for (h2 in 1:length(uTRMi)) {
          for (h3 in 1:length(uTRVi)) {
            for (h4 in 1:length(usleepparam)) {
              cat(paste0(" ",uwi[j],"-",uTRLi[h1],"-",uTRMi[h2],"-",uTRVi[h3],"-",usleepparam[h4]))
              seluwi = which(as.character(outputfinal$window) == uwi[j] &
                               as.character(outputfinal$TRLi) == uTRLi[h1] &
                               as.character(outputfinal$TRMi) == uTRMi[h2] &
                               as.character(outputfinal$TRVi) == uTRVi[h3] &
                               as.character(outputfinal$sleepparam) == usleepparam[h4])
              # store spreadsheet
              if (nrow(outputfinal[seluwi,]) == 0) {
                cat("report not stored, because no results available")
              } else {
                CN = colnames(outputfinal)
                outputfinal2 = outputfinal
                colnames(outputfinal2) = CN
                delcol = which(colnames(outputfinal2) == "window" | colnames(outputfinal2) == "TRLi" |
                                 colnames(outputfinal2) == "TRMi" | colnames(outputfinal2) == "TRVi" |
                                 colnames(outputfinal2) == "sleepparam")
                outputfinal2 = outputfinal2[,-delcol]
                OF3 = outputfinal2[seluwi,]
                OF3 = as.data.frame(OF3, stringsAsFactors = TRUE)
                #-------------------------------------------------------------
                # store all summaries in csv files without cleaning criteria
                write.csv(OF3,paste(metadatadir,"/results/QC/part5_daysummary_full_",
                                    uwi[j],"_L",uTRLi[h1],"M",uTRMi[h2],"V",uTRVi[h3],
                                    "_",usleepparam[h4],".csv",sep=""),row.names=FALSE)
                # store all summaries in csv files with cleaning criteria
                validdaysi = getValidDayIndices(OF3,includedaycrit.part5, window = uwi[j])
                write.csv(OF3[validdaysi,],paste(metadatadir,"/results/part5_daysummary_",
                                                 uwi[j],"_L",uTRLi[h1],"M",uTRMi[h2],"V",
                                                 uTRVi[h3],"_",usleepparam[h4],".csv",sep=""), row.names=FALSE)
                #------------------------------------------------------------------------------------
                #also compute summary per person
                agg_plainNweighted = function(df,filename="filename",daytype="daytype") {
                  # function to take both the weighted (by weekday/weekendday) and plain average of all numeric variables
                  # df: input data.frame (OF3 outside this function)
                  ignorevar = c("daysleeper","cleaningcode","night_number","sleeplog_used","ID","acc_available","window_number",
                                "boutcriter.mvpa", "boutcriter.lig", "boutcriter.in", "bout.metric")
                  for (ee in 1:ncol(df)) { # make sure that numeric columns have class numeric
                    nr = nrow(df)
                    if (nr > 30) nr = 30
                    options(warn=-1)
                    trynum = as.numeric(as.character(df[1:nr,ee]))
                    options(warn=0)
                    if (length(which(is.na(trynum) == TRUE)) != nr &
                        length(which(ignorevar == names(df)[ee])) == 0) {
                      options(warn=-1)
                      class(df[,ee]) = "numeric"
                      options(warn=0)
                    }
                  }
                  plain_mean = function(x) {
                    options(warn=-1)
                    plain_mean = mean(x,na.rm=TRUE)
                    options(warn=0)
                    if (is.na(plain_mean) == TRUE) {
                      plain_mean = x[1]
                    }
                    return(plain_mean)
                  }
                  # aggregate across all days
                  PlainAggregate = aggregate.data.frame(df,by=list(df$filename),FUN=plain_mean)
                  PlainAggregate = PlainAggregate[,-1]
                  # aggregate per day type (weekday or weekenddays)
                  AggregateWDWE = aggregate.data.frame(df,by=list(df$filename,df$daytype),plain_mean)
                  AggregateWDWE = AggregateWDWE[,-c(1:2)]
                  # Add counted number of days for Gini, Cov, alpha Fragmentation variables, because 
                  # days are dropped if there are not enough fragments:
                  vars_with_mininum_Nfrag = c("FRAG_Gini_dur_PA_day", "FRAG_CoV_dur_PA_day",
                                              "FRAG_alpha_dur_PA_day", "FRAG_Gini_dur_IN_day",
                                              "FRAG_CoV_dur_IN_day")
                  vars_with_mininum_Nfrag_i = which(vars_with_mininum_Nfrag %in% colnames(df) == TRUE)
                  if (length(vars_with_mininum_Nfrag_i) > 0) {
                    varname_minfrag = vars_with_mininum_Nfrag[vars_with_mininum_Nfrag_i[1]]
                    DAYCOUNT_Frag_Multiclass = aggregate.data.frame(df[,varname_minfrag],
                                                                    by=list(df$filename,df$daytype),
                                                                    FUN=function(x) length(which(is.na(x) == FALSE)))
                    colnames(DAYCOUNT_Frag_Multiclass)[1:2] = c("filename","daytype")
                    colnames(DAYCOUNT_Frag_Multiclass)[3] = "Nvaliddays_AL10F" # AL10F, abbreviation for: at least 10 fragments
                    AggregateWDWE = merge(AggregateWDWE, DAYCOUNT_Frag_Multiclass, by.x = c("filename","daytype"))
                  }
                  len = NULL
                  AggregateWDWE$len <- 0
                  AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WD")] = 5 #weighting of weekdays
                  AggregateWDWE$len[which(as.character(AggregateWDWE$daytype) == "WE")] = 2 #weighting of weekend days
                  dt <- data.table::as.data.table(AggregateWDWE[,which(lapply(AggregateWDWE, class)=="numeric" |
                                                                         names(AggregateWDWE) == filename)])
                  options(warn=-1)
                  .SD <- .N <- count <- a <- NULL
                  WeightedAggregate <- dt[,lapply(.SD,weighted.mean,w=len,na.rm=TRUE),by=list(filename)]
                  options(warn=0)
                  LUXmetrics = c("above1000", "timeawake", "mean", "imputed", "ignored")
                  add_missing_LUX = function(x, LUX_day_segments, weeksegment=c(), LUXmetrics) {
                    # missing columns, add these:
                    NLUXseg = length(LUX_day_segments)
                    if (length(weeksegment) > 0) {
                      LUX_segment_vars_expected = paste0("LUX_",LUXmetrics,"_",LUX_day_segments[1:(NLUXseg-1)],"-",LUX_day_segments[2:(NLUXseg)],"hr_day_",weeksegment)
                    } else {
                      LUX_segment_vars_expected = paste0("LUX_",LUXmetrics,"_",LUX_day_segments[1:(NLUXseg-1)],"-",LUX_day_segments[2:(NLUXseg)],"hr_day")
                    }
                    dummy_df = as.data.frame(matrix(NaN,1, (NLUXseg-1)))
                    colnames(dummy_df) = LUX_segment_vars_expected
                    x = as.data.frame(merge(x, dummy_df, all.x = T))
                    # re-order
                    current_location = which(colnames(x) %in% LUX_segment_vars_expected == TRUE)
                    neworder = sort(colnames(x)[current_location])
                    x = cbind(x[,-current_location], x[,LUX_segment_vars_expected])
                    return(x)
                  }
                  LUX_segment_vars = c()
                  for (li in 1:length(LUXmetrics)) {
                    LUX_segment_vars = c(LUX_segment_vars, grep(pattern = paste0("LUX_",LUXmetrics[li]),x = colnames(WeightedAggregate), value=TRUE))
                  }
                  if (length(LUX_segment_vars) > 0 & length(LUX_segment_vars) < 24 & length(LUX_day_segments) > 0) {
                    WeightedAggregate = add_missing_LUX(WeightedAggregate, LUX_day_segments, weeksegment=c(), LUXmetrics = LUXmetrics)
                  }
                  # merge them into one output data.frame (G)
                  LUX_segment_vars = c()
                  for (li in 1:length(LUXmetrics)) {
                    LUX_segment_vars = colnames(PlainAggregate) %in% grep(x = colnames(PlainAggregate), pattern=paste0("LUX_",LUXmetrics[li]), value=TRUE)
                  }
                  charcol = which(lapply(PlainAggregate, class) != "numeric" & names(PlainAggregate) != filename & !(LUX_segment_vars))
                  numcol = which(lapply(PlainAggregate, class) == "numeric" | LUX_segment_vars)
                  WeightedAggregate = as.data.frame(WeightedAggregate, stringsAsFactors = TRUE)
                  G = base::merge(PlainAggregate,WeightedAggregate,by="filename",all.x=TRUE)
                  p0b = paste0(names(PlainAggregate[,charcol]),".x")
                  p1 = paste0(names(PlainAggregate[,numcol]),".x")
                  p2 = paste0(names(PlainAggregate[,numcol]),".y")
                  for (i in 1:length(p0b)) {
                    names(G)[which(names(G)==p0b[i])] = paste0(names(PlainAggregate[,charcol])[i])
                  }
                  for (i in 1:length(p1)) {
                    names(G)[which(names(G)==p1[i])] = paste0(names(PlainAggregate[,numcol])[i],"_pla")
                  }
                  for (i in 1:length(p2)) {
                    names(G)[which(names(G)==p2[i])] = paste0(names(PlainAggregate[,numcol])[i],"_wei")
                  }
                  # expand output with weekday (WD) and weekend (WE) day aggregates
                  for (weeksegment in c("WD", "WE")) {
                    temp_aggregate = AggregateWDWE[which(AggregateWDWE$daytype==weeksegment),]
                    charcol = which(lapply(temp_aggregate, class) != "numeric" & names(temp_aggregate) != filename)
                    numcol = which(lapply(temp_aggregate, class) %in% c("numeric", "integer") == TRUE)
                    names(temp_aggregate)[numcol] = paste0(names(temp_aggregate)[numcol], "_", weeksegment)
                    temp_aggregate = temp_aggregate[,c(which(colnames(temp_aggregate) == "filename"), numcol)]
                    LUX_segment_vars = c()
                    for (li in 1:length(LUXmetrics)) {
                      LUX_segment_vars = grep(pattern = paste0("LUX_",LUXmetrics[li]),x = colnames(temp_aggregate), value=TRUE)
                    }
                    if (length(LUX_segment_vars) > 0 & length(LUX_segment_vars) < 24 & length(LUX_day_segments) > 0) {
                      temp_aggregate = add_missing_LUX(temp_aggregate, LUX_day_segments, weeksegment, LUXmetrics)
                    }
                    G = base::merge(G, temp_aggregate,
                                    by="filename", all.x=TRUE)
                  }
                  G = G[,-which(names(G) %in% c("len", "daytype", "len_WE", "len_WD"))]
                  return(G)
                }
                #---------------------------------------------
                # Calculate, weighted and plain mean of all variables
                # add column to define what are weekenddays and weekdays as needed for function agg_plainNweighted
                # before processing OF3, first identify which days have enough monitor wear time
                validdaysi = getValidDayIndices(OF3,includedaycrit.part5, window = uwi[j])
                if (length(validdaysi) >0) { # do not attempt to aggregate if there are no valid days
                  # aggregate OF3 (days) to person summaries in OF4
                  OF4 = agg_plainNweighted(OF3[validdaysi,],filename="filename",day="daytype")
                  # calculate additional variables
                  OF3tmp = OF3[,c("filename","night_number","daysleeper","cleaningcode","sleeplog_used","guider",
                                  "acc_available","nonwear_perc_day","nonwear_perc_spt","daytype","dur_day_min",
                                  "dur_spt_min")]
                  foo34 = function(df,aggPerIndividual,nameold,namenew,cval) {
                    # function to help with calculating additinal variables
                    # related to counting how many days of measurement there are
                    # that meet a certain criteria
                    # cval is a vector with 0 and 1, indicating whether the criteria is met
                    # aggPerIndividual is the aggregate data (per individual)
                    # df is the non-aggregated data (days across individuals
                    # we want to extra the number of days per individuals that meet the
                    # criteria in df, and make it allign with aggPerIndividual.
                    df2 = function(x) df2 = length(which(x==cval)) # check which values meets criterion
                    mmm = as.data.frame(aggregate.data.frame(df,by=list(df$filename),FUN = df2),
                                        stringsAsFactors = TRUE)
                    mmm2 = data.frame(filename=mmm$Group.1, cc=mmm[,nameold], stringsAsFactors = TRUE)
                    aggPerIndividual = merge(aggPerIndividual, mmm2,by="filename")
                    names(aggPerIndividual)[which(names(aggPerIndividual)=="cc")] = namenew
                    foo34 = aggPerIndividual
                  }
                  # # calculate number of valid days (both night and day criteria met)
                  OF3tmp$validdays = 0
                  # criteria is that nonwear percentage needs to be below threshold for both day and night:
                  OF3tmp$validdays[validdaysi] = 1
                  # now we have a label for the valid days, we can create a new variable
                  # in OF4 that is a count of the number of valid days:
                  OF4 = foo34(df=OF3tmp,aggPerIndividual=OF4,nameold="validdays",namenew="Nvaliddays",cval=1)
                  # do the same for WE (weekend days):
                  OF3tmp$validdays = 0
                  OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WE")]] = 1
                  OF4 = foo34(df=OF3tmp,aggPerIndividual=OF4,nameold="validdays",namenew="Nvaliddays_WE",cval=1)
                  # do the same for WD (weekdays):
                  OF3tmp$validdays = 0
                  OF3tmp$validdays[validdaysi[which(OF3tmp$daytype[validdaysi] == "WD")]] = 1
                  OF4 = foo34(df=OF3tmp,aggPerIndividual=OF4,nameold="validdays",namenew="Nvaliddays_WD",cval=1) # create variable from it
                  # do the same for daysleeper,cleaningcode, sleeplog_used, acc_available:
                  OF3tmp$validdays = 1
                  OF4 = foo34(df=OF3tmp[validdaysi,],aggPerIndividual=OF4,nameold="daysleeper",namenew="Ndaysleeper",cval=1)
                  OF4 = foo34(df=OF3tmp[validdaysi,],aggPerIndividual=OF4,nameold="cleaningcode",namenew="Ncleaningcodezero",cval=0)
                  for (ccode in 1:6) {
                    OF4 = foo34(df=OF3tmp[validdaysi,], aggPerIndividual=OF4, nameold="cleaningcode",
                                namenew=paste0("Ncleaningcode", ccode), cval=ccode)
                  }
                  OF4 = foo34(df=OF3tmp[validdaysi,],aggPerIndividual=OF4,nameold="sleeplog_used",namenew="Nsleeplog_used",cval=TRUE)
                  OF4 = foo34(df=OF3tmp[validdaysi,],aggPerIndividual=OF4,nameold="acc_available",namenew="Nacc_available",cval=1)
                  # Move valid day count variables to beginning of dataframe
                  OF4 = cbind(OF4[,1:5],OF4[,(ncol(OF4)-10):ncol(OF4)],OF4[,6:(ncol(OF4)-11)])
                  nom = names(OF4)
                  cut = which(nom == "sleeponset_ts" | nom == "wakeup_ts" | nom == "night_number"  | nom == "window_number"
                              | nom == "daysleeper" | nom == "cleaningcode" | nom == "acc_available"
                              | nom == "guider" | nom == "L5TIME" | nom == "M5TIME"
                              | nom == "L10TIME" | nom == "M10TIME" | nom == "acc_available" | nom == "daytype")
                  names(OF4)[which(names(OF4)=="weekday")] = "startday"
                  OF4 = OF4[,-cut]
                  for (col4 in 1:ncol(OF4)) {
                    navalues = which(is.na(OF4[,col4]) == TRUE)
                    if (length(navalues) > 0) {
                      OF4[navalues, col4] = ""
                    }
                  }
                  #Move Nvaliddays variables to the front of the spreadsheet
                  Nvaliddays_variables = grep(x = colnames(OF4), pattern = "Nvaliddays", value = FALSE)
                  Nvaliddays_variables = unique(c(which(colnames(OF4) =="Nvaliddays"),
                                                  which(colnames(OF4) =="Nvaliddays_WD"),
                                                  which(colnames(OF4) =="Nvaliddays_WE"), Nvaliddays_variables))
                  OF4 = OF4[,unique(c(1:4, Nvaliddays_variables, 5:ncol(OF4)))]
                  #-------------------------------------------------------------
                  # store all summaries in csv files
                  write.csv(OF4,paste(metadatadir,"/results/part5_personsummary_",
                                      uwi[j],"_L",uTRLi[h1],"M",uTRMi[h2],"V",uTRVi[h3],"_",usleepparam[h4],".csv",sep=""),row.names=FALSE)
                }
              }
            }
            
          }
        }
      }
    }
    rm(outputfinal, outputfinal2)
  }
}
