# Modified: 15 Dec 2016 SDH

summBg <- function(
  vol,
  setup,
  id.name = 'id',
  time.name = 'time',
  descrip.name = 'descrip',
  inoc.name = NULL,
  inoc.m.name = NULL,
  norm.name = NULL,
  norm.sd.name = NULL,
  vol.name = 'cvCH4',
  imethod = 'linear',
  extrap = FALSE,
  when = 30,
  show.obs = FALSE, 
  sort = TRUE) 
{

  # Argument checks~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  checkArgClassValue(vol, 'data.frame')
  checkArgClassValue(setup, 'data.frame')
  checkArgClassValue(id.name, 'character')
  checkArgClassValue(time.name, c('character', 'NULL'))
  checkArgClassValue(descrip.name, c('character', 'NULL'))
  checkArgClassValue(inoc.name, c('character', 'NULL'))
  checkArgClassValue(norm.name, c('character', 'NULL'))
  checkArgClassValue(inoc.m.name, c('character', 'NULL'))
  checkArgClassValue(vol.name, 'character')
  # Skip imethod, since it is checked in interp()
  checkArgClassValue(extrap, 'logical')
  checkArgClassValue(when, c('numeric', 'integer', 'character', 'NULL'))
  checkArgClassValue(show.obs, 'logical')
  checkArgClassValue(sort, 'logical')
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Echo response variable
  message('Response variable (volume) is ', deparse(substitute(vol)), '$', vol.name, '.')

  # Check for missing columns in vol
  if(class(when) %in% c('numeric', 'integer')) {
    if(any(missing.col <- !c(id.name, time.name, vol.name) %in% names(vol))){
      stop('Specified columns in vol data frame (', deparse(substitute(vol)), ') not found: ', c(id.name, time.name, vol.name)[missing.col], '.')
    } 
  } else { # when is 'end' or 'meas'
    if(any(missing.col <- !c(id.name, vol.name) %in% names(vol))){
      stop('Specified columns in vol data frame (', deparse(substitute(vol)), ') not found: ', c(id.name, vol.name)[missing.col], '.')
    } 
  }

  # Check for missing columns in setup
  if(any(missing.col <- !c(id.name, descrip.name) %in% names(setup))){
    stop('Specified columns in setup data frame (', deparse(substitute(setup)), ') not found: ', c(id.name, descrip.name)[missing.col], '.')
  } 

  # Check that inoc.name and norm.name can be found in setup data frame
  if(!is.null(inoc.name) && !inoc.name %in% setup[, descrip.name]) {
    stop('inoc.name ', deparse(substitute(inoc.name)), ' not found in ', deparse(substitute(setup)), '$', descrip.name, '.')
  }

  if(!is.null(norm.name) && !norm.name %in% names(setup)) {
    stop('norm.name ', deparse(substitute(norm.name)), ' not found in the column names of ', deparse(substitute(setup)), '.')
  }

  # And inoc.m.name
  if(!is.null(inoc.m.name) && !inoc.m.name %in% names(setup)) {
    stop('inoc.m.name ', deparse(substitute(inoc.m.name)), ' not found in the column names of ', deparse(substitute(setup)), '.')
  }

  # Problem if inoc.name is given but inoc.m.name is not
  if(!is.null(inoc.name) & is.null(inoc.m.name)) {
    stop('inoc.m.name must be provided in order to subtract inoculumn contribution.')
  }

  # Check for case when 'when' argument > all times
  if((is.numeric(when) | is.integer(when)) && all(when > vol[, time.name])) {
    stop('when argument (', when, ') is > all times in data.')

  }

  # Add other checks here

  # Trim setup based on ids and check again for inoc.name and norm.name~~~~~~~~~~~~~~~~~~~
  # Find reactor/bottle IDs present in both vol and setup
  ids <- intersect(setup[, id.name], vol[, id.name])

  setup <- setup[setup[, id.name] %in% ids, ]

  if(!is.null(inoc.name) && !inoc.name %in% setup[, descrip.name]) {
    stop('inoc.name ', deparse(substitute(inoc.name)), ' no longer in setup after trimming--are reactors present in setup missing in vol?')
  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Remove inoc ids
  if(!is.null(inoc.name)) {
    ids <- setup[setup[, descrip.name]!=inoc.name, id.name]
    ids.inoc <- setup[setup[, descrip.name]==inoc.name, id.name]
  }

  # Check for duplicates in setup and vol~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  if(any(duplicated(setup[, id.name]))) {
    stop('Duplicated reactor IDs (', id.name, ' column) in setup dataframe! This must be an error.')
  }

  if(any(duplicated(vol[, c(id.name, time.name)]))) {
    stop('Duplicated ID (', id.name, ' column) x time (', time.name, ' column) in vol dataframe! This must be an error.')
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Drop missing values from vol with a warning
  if(any(is.na(vol[, vol.name]))) {
    warning('Missing volume data in vol dataframe will be dropped.')
    print('here')
    vol <- vol[!is.na(vol[, vol.name]), ]
  }

  # Interpolate cvCH4 to common time for each reactor~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # Or select values for analysis (when = 'end' or 'meas')

  if(class(when) %in% c('numeric', 'integer')) {
    summ1 <- expand.grid(id = ids, time = when)
    names(summ1) <- c(id.name, time.name)

    # Then interpolate
    for(i in ids) {
      dc <- vol[vol[, id.name]==i, ]
      # Interpolate if more than one value is present

      if(nrow(dc)>1) {
        summ1[summ1[, id.name]==i, vol.name] <- interp(dc[, time.name], dc[, vol.name], time.out = when, method = imethod, extrap = extrap)
      } else {
	if(dc[, time.name]==when) { # `when` argument matches the single time present
          summ1[summ1[, id.name]==i, vol.name] <- dc[, vol.name]
	} else {
          summ1[summ1[, id.name]==i, vol.name] <- NA
      	  warning('There is only a single ', vol.name, ' value for reactor ', i,', and it does not match the specified when (', when, '). Interpolation is not possible.')
	}
      }

    }

  } else if(length(when) == 1 && tolower(when) == 'end') { # User just wants to use latest values of volume

    summ1 <- data.frame(id = ids, time = NA, vol = NA)
    names(summ1) <- c(id.name, time.name, vol.name)

    # Sort, in order to find latest values
    vol <- vol[order(vol[, id.name], vol[, vol.name]), ]

    for(i in ids) {
      dc <- vol[vol[, id.name]==i, ]
      # Select the last row from sorted data frame
      summ1[summ1[, id.name]==i, c(time.name, vol.name)] <- dc[nrow(dc), c(time.name, vol.name)]
    }

  } else if(length(when) == 1 && when %in% c('meas', '1p')) { # Return values for all measurement times, which may differ among reactors

    summ1 <- vol[vol[, id.name] %in% ids, c(id.name, time.name, vol.name)]

  } else  {

    stop('when argument not recognized. Options are numeric or integer vector, \"end\" or \"meas\".')

  
  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Get all unique times
  times.summ <- unique(summ1[, time.name])

  # Work with inoculum data~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  # Now interpolate inoculum-only reactors to all unique times
  if(!is.null(inoc.name)) {

    summ.inoc <- expand.grid(id = ids.inoc, time = times.summ)

    # Then interpolate inoculum production (each inoc reactor) to each time
    for(i in ids.inoc) {

      dc <- vol[vol[, id.name]==i, ]

      # Interpolate if more than one value is present
      if(nrow(dc)>1) {
        summ.inoc[summ.inoc$id==i, vol.name] <- interp(dc[, time.name], dc[, vol.name], time.out = times.summ, method = imethod, extrap = extrap)
      } else {

	if(dc[, time.name]==times.summ) { # `when` argument matches the single time present
          summ.inoc[summ.inoc$id==i, vol.name] <- dc[, vol.name]
	} else {
          summ.inoc[summ.inoc$id==i, vol.name] <- NA
      	  warning('There is only a single ', vol.name, ' value for reactor ', i,', and it does not match the specified when (', when, '). Interpolation is not possible.')
	}

      }

    }

    # Check for NAs in inoculum data (probably extrapolation issue)
    if(any(is.na(summ.inoc[, vol.name]))) {
      warning('Missing values in inoculum-only volumes. Did the inoculum-only incubation end before other bottles or before \'when\'? Dropping observation(s). Try extrap = TRUE to retain (but be aware of what this means).')
      summ.inoc <- summ.inoc[!is.na(summ.inoc[, vol.name]), ]
    }

    # See if latest times have been dropped/are not available
    if(max(summ.inoc$time) < max(summ1[, time.name])) {
      warning('Times for the inoculum-only bottles do not extend as far as times for other bottles. See NaNs in output. Select a shorter time to avoid NaNs.')
    }

    # Merge to add mass inoculum and VS in substrate
    # Merge only necessary columns!
    summ.inoc <- merge(setup[, c(id.name, inoc.m.name)], summ.inoc, by.x = id.name, by.y = 'id')

    # Volume contribution per unit inoculum mass
    summ.inoc$vol.mi <- summ.inoc[, vol.name]/summ.inoc[, inoc.m.name]

    # Mean and sd volume contribution per unit inoc mass
    inoc.vol <- data.frame()

    for(i in times.summ) {
      vol.mi <- summ.inoc[summ.inoc$time == i, 'vol.mi']
      # Calculate sd only if there is more than one observation
      if(length(vol.mi) > 1) {
        ss <- sd(vol.mi)
      } else {
        ss <- 0
        warning('Only one inoculum-only bottle is present, so reported sd does not include variation within inoculum-only bottles.')
      }
      inoc.vol <- rbind(inoc.vol, c(time = i, mn = mean(vol.mi), s = ss))
    }

    names(inoc.vol) <- c(time.name, 'vol.mi.mn', 'vol.mi.sd')
    # inoc.vol has mean and sd vol per unit mass inoc for all times

  }
  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Samples
  # Add mass of inoculum and VS in substrate
  summ1 <- merge(setup, summ1, by = id.name)

  if(!is.null(inoc.name)) {

    # Merge inoculum normalized volumes with sample data
    summ1 <- merge(summ1, inoc.vol, by = time.name)

    # Calculate and substract inoc contribution
    # Next three lines only for returning additional info when show.obs = TRUE
    summ1[, paste0(vol.name, '.tot')] <- summ1[, vol.name]
    summ1[, paste0(vol.name, '.inoc')] <- summ1$vol.mi.mn*summ1[, inoc.m.name]
    summ1[, 'fv.inoc'] <- summ1[, paste0(vol.name, '.inoc')]/summ1[, paste0(vol.name, '.tot')]

    # Correct vol for inoculum
    summ1[, vol.name] <- summ1[, vol.name] - summ1$vol.mi.mn*summ1[, inoc.m.name]

    # Add sd in volume produced by inoculum for use below in error propagation
    summ1[, 'sd.inoc'] <- summ1$vol.mi.sd*summ1[, inoc.m.name]

  } else {

    # NTS: How did I handle this before 10 Feb 2016?
    summ1[, 'sd.inoc'] <- 0

  }

  # If selected, find times where rate drops below 1%/d of cumulative
  if(length(when) == 1 && when == '1p') { 

    # Find time when rvCH4 <= 1% of cvCH4
    s1times <- NULL
    summ1$rrvCH4 <- NA

    # Calculate relative rates
    for(i in ids) {
      dd <- summ1[summ1[, id.name] == i, ]
      dd <- dd[order(dd[, time.name]), ]
      rr <- c(NA, diff(dd[, vol.name])/diff(dd[, time.name]))/dd[, vol.name]
      # Add rates to summ1 only for exporting with show.obs = TRUE
      summ1[summ1[, id.name] == i, 'rrvCH4'] <- signif(100*rr, 4)
    }

    # Return observations here (early to avoid problem in next 2 blocks--see error messages)
    if(show.obs) {
      return(summ1)
    }

    # Back to working with rates (after show.obs option above)
    for(i in ids) {
      dd <- summ1[summ1[, id.name] == i, ]

      rr <- dd$rrvCH4/100
      tt <- dd[, time.name]

      # Find rates < 1%
      i1 <- which(rr <= 0.01)

      # That are consecutive
      i1d <- diff(i1)

      # That are uninterupted by a high rate
      if(any(i1d > 1)) {
        i2 <- max(which(i1d > 1)) + 1 
      } else {
        i2 <- 1
      }

      # Take first following time at least 3 d after obs preceeding first obs below 1% (this is correct!--think about production for first obs starting just after preceeding obs, so 3 d count should start then
      i3 <- i1[i2]
      i3 <- which(tt - tt[i3 - 1] >= 3)[1]

      if(!is.na(i3)) {
        ss <- dd[i3, ]
        s1times <- rbind(s1times, ss)
      } else {
        stop('You selected \"1p\" option for \"when\" argument but there are no observations that meet the criterion for id ', i, ' (and possibly others). Either use a fixed time for \"when\" or remove this id. Leave when = \"1p\" and set show.obs = TRUE to check rates for all bottles.')
        ##ss <- dd[nrow(dd), ]
        ##s1times <- rbind(s1times, ss)
      }

    }

    # Check for different times for bottles with same descrip
    summ1temp <- data.frame()

    for(i in unique(s1times[, descrip.name])) {
      tt <- max(s1times[s1times[, descrip.name] == i, time.name])

      for(j in unique(summ1[summ1[, descrip.name] == i, id.name])) {
        # Select times >= max time for this decrip.name level
        ss <- summ1[summ1[, id.name] == j & summ1[, time.name] >= tt, ]
        if(length(ss) == 0) stop('when = "1p" problem. Re-run function with show.obs = TRUE')
        ss <- ss[1, ]
        summ1temp <- rbind(summ1temp, ss)
      }

    }

    summ1 <- summ1temp

  } 

  # Normalization
  if(!is.null(norm.name)) { 

    # First calculate sd on normalized volume based on sd of VS
    # Nearly equivalent to calculate relative error in norm.name and apply it directly (i.e., 10% for norm.name = 10% for vol.name)
    if(!is.null(norm.sd.name)) {
      summ1[, paste0(vol.name,'.sd')] <- (summ1[, vol.name]/(summ1[, norm.name] - summ1[, norm.sd.name]) - 
                                          summ1[, vol.name]/(summ1[, norm.name] + summ1[, norm.sd.name]))/2
    } else {
      summ1[, paste0(vol.name,'.sd')] <- 0
    }

    # Normalize remaining vol by norm.name (typically by substrate VS)
    summ1[, vol.name] <- summ1[, vol.name]/summ1[, norm.name]

    # Normalize sd contribution from inoc by the same value
    summ1[, 'sd.inoc'] <- summ1[, 'sd.inoc']/summ1[, norm.name]

    # Next two lines only for returning additional info when show.obs = TRUE
    # Only have the .tot and .inoc columns when inoc is subtracted out
    if(!is.null(inoc.name) && inoc.name %in% setup[, descrip.name]) { 
      summ1[, paste0(vol.name, '.tot')] <- summ1[, paste0(vol.name, '.tot')]/summ1[, norm.name]
      summ1[, paste0(vol.name, '.inoc')] <- summ1[, paste0(vol.name, '.inoc')]/summ1[, norm.name]
    }
  } else {
      summ1[, paste0(vol.name,'.sd')] <- 0
  }

  # Calculate means and sd for a summary
  if(!show.obs) {
    # Summarize by description
    summ2 <- unique(summ1[, c(time.name, descrip.name)]) # NTS: may want to put time second

    for(i in unique(summ1[, descrip.name])){
      dd <- summ1[summ1[, descrip.name]==i, ]
      for(j in unique(dd[, time.name])) {
        ddd <- dd[dd[, time.name]==j, ]
        summ2[summ2[, descrip.name]==i & summ2[, time.name]==j, 'mean'] <- mean(na.omit(ddd[, vol.name]))
        summ2[summ2[, descrip.name]==i & summ2[, time.name]==j, 'sd'] <- sqrt(sd(na.omit(ddd[, vol.name]))^2 + 
                                                                              mean(ddd[, 'sd.inoc'])^2 + 
                                                                              mean(ddd[, paste0(vol.name,'.sd')])^2) 
        summ2[summ2[, descrip.name]==i & summ2[, time.name]==j, 'n'] <- sum(!is.na(ddd[, vol.name]))  
      }
    }
  } else { # If show.obs = TRUE, just return individual observations
    #summ1$sd.inoc <- NULL
    summ2 <- summ1[order(summ1[, descrip.name], summ1[, id.name], summ1[, time.name]), ]
  }


  # More messages~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  # Messages about inoculum 
  if(!is.null(inoc.name) && inoc.name %in% setup[, descrip.name]) { # Inoculum contribution subtracted
    #message('Inoculum contribution subtracted based on ', deparse(substitute(setup.orig)), '$', inoc.m.name, '.') 
    message('Inoculum contribution subtracted based on setup$', inoc.m.name, '.') 
  } else {
      message('Inoculum contribution not subtracted.') 
  }

  # Message about normalization
  if(!is.null(norm.name)) { 
    #message('Response normalized by ', deparse(substitute(setup)), '$', norm.name, '.')
    message('Response normalized by setup$', norm.name, '.')
  } else {
    message('No normalization by substrate mass.')
  }

  # Select columns
  if(!show.obs) {
    summ2 <- summ2[ , c(descrip.name, time.name, 'mean', 'sd', 'n')]
  } 

  # Sort result
  if(sort) {
    if(show.obs) {
      summ2 <- summ2[order(summ2[, descrip.name], summ2[, id.name], summ2[, time.name]), ]
    } else {
      summ2 <- summ2[order(summ2[, descrip.name], summ2[, time.name]), ]
    }
  } else {
    # Get original reactor order from setup
    descrip.order <- 1:length(unique(setup[, descrip.name]))
    names(descrip.order) <- setup[!duplicated(setup[, descrip.name]), descrip.name]

    # Sort
    summ2 <- summ2[order(descrip.order[as.character(summ2[, descrip.name])], summ2[, time.name]), ]
  }

  # Row names
  rownames(summ2) <- 1:nrow(summ2)

  return(summ2)

}
