% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filter_data.r
\name{filter.data}
\alias{filter.data}
\title{Basic Eddy Covariance Data Filtering}
\usage{
filter.data(data, quality.control = TRUE, filter.growseas = FALSE,
  filter.precip = FALSE, filter.vars = NULL, filter.vals.min,
  filter.vals.max, NA.as.invalid = TRUE, vars.qc = NULL,
  quality.ext = "_qc", good.quality = c(0, 1), missing.qc.as.bad = TRUE,
  GPP = "GPP", doy = "doy", year = "year", tGPP = 0.5, ws = 15,
  min.int = 5, precip = "precip", tprecip = 0.01, precip.hours = 24,
  records.per.hour = 2, filtered.data.to.NA = TRUE)
}
\arguments{
\item{data}{Data.frame or matrix containing all required input variables in 
half-hourly or hourly resolution. Including year, month, day information}

\item{quality.control}{Should quality control be applied? Defaults to \code{TRUE}.}

\item{filter.growseas}{Should data be filtered for growing season? Defaults to \code{FALSE}.}

\item{filter.precip}{Should precipitation filtering be applied? Defaults to \code{FALSE}.}

\item{filter.vars}{Additional variables to be filtered. Vector of type character.}

\item{filter.vals.min}{Minimum values of the variables to be filtered. Numeric vector of 
the same length than \code{filter.vars}. Set to \code{NA} to be ignored.}

\item{filter.vals.max}{Maximum values of the variables to be filtered. Numeric vector of 
the same length than \code{filter.vars}. Set to \code{NA} to be ignored.}

\item{NA.as.invalid}{If \code{TRUE} (the default) missing data are filtered out (applies to all variables).}

\item{vars.qc}{Character vector indicating the variables for which quality filter should 
be applied. Ignored if \code{quality.control = FALSE}.}

\item{quality.ext}{The extension to the variables' names that marks them as 
quality control variables. Ignored if \code{quality.control = FALSE}.}

\item{good.quality}{Which values indicate good quality (i.e. not to be filtered) 
in the quality control (qc) variables? Ignored if \code{quality.control = FALSE}.}

\item{missing.qc.as.bad}{If quality control variable is \code{NA}, should the corresponding data point be
treated as bad quality? Defaults to \code{TRUE}. Ignored if \code{quality.control = FALSE}.}

\item{GPP}{Gross primary productivity (umol m-2 s-1); Ignored if \code{filter.growseas = FALSE}.}

\item{doy}{Day of year; Ignored if \code{filter.growseas = FALSE}.}

\item{year}{Year; Ignored if \code{filter.growseas = FALSE}.}

\item{tGPP}{GPP threshold (fraction of 95th percentile of the GPP time series).
Must be between 0 and 1. Ignored if \code{filter.growseas} is \code{FALSE}.}

\item{ws}{Window size used for GPP time series smoothing. 
Ignored if \code{filter.growseas = FALSE}.}

\item{min.int}{Minimum time interval in days for a given state of growing season.
Ignored if \code{filter.growseas = FALSE}.}

\item{precip}{Precipitation (mm time-1)}

\item{tprecip}{Precipitation threshold used to identify a precipitation event (mm). 
Ignored if \code{filter.precip = FALSE}.}

\item{precip.hours}{Number of hours removed following a precipitation event (h).
Ignored if \code{filter.precip = FALSE}.}

\item{records.per.hour}{Number of observations per hour. I.e. 2 for half-hourly data.}

\item{filtered.data.to.NA}{Logical. If \code{TRUE} (the default), all variables in the input
data.frame/matrix are set to \code{NA} for the time step where ANY of the
\code{filter.vars} were beyond their acceptable range (as
determined by \code{filter.vals.min} and \code{filter.vals.max}).
If \code{FALSE}, values are not filtered, and an additional column 'valid'
is added to the data.frame/matrix, indicating if any value of a row
did (1) or did not fulfill the filter criteria (0).}
}
\value{
If \code{filtered.data.to.NA = TRUE} (default), the input data.frame/matrix with 
        observations which did not fulfill the filter criteria set to \code{NA}. 
        If \code{filtered.data.to.NA = FALSE}, the input data.frame/matrix with an additional 
        column "valid", which indicates whether all the data of a time step fulfill the 
        filtering criteria (1) or not (0).
}
\description{
Filters time series of EC data for high-quality values and specified
             meteorological conditions.
}
\details{
This routine consists of two parts:

         1) Quality control: All variables included in \code{vars.qc} are filtered for 
            good quality data. For these variables a corresponding quality variable with 
            the same name as the variable plus the extension as specified in \code{quality.ext}
            must be provided. For time steps where the value of the quality indicator is not included
            in the argument \code{good.quality}, i.e. the quality is not considered as 'good', 
            its value is set to \code{NA}.
            
         2) Meteorological filtering. Under certain conditions (e.g. low ustar), the assumptions
            of the EC method are not fulfilled. Further, some data analysis require certain meteorological
            conditions, such as periods without rainfall, or active vegetation (growing season, daytime).
            The filter applied in this second step serves to exclude time periods that do not fulfill the criteria
            specified in the arguments. More specifically, time periods where one of the variables is higher
            or lower than the specified thresholds (\code{filter.vals.min} and \code{filter.vals.max})
            are set to \code{NA} for all variables. If a threshold is set to \code{NA}, it will be ignored.
}
\note{
The thresholds set with \code{filter.vals.min} and \code{filter.vals.max} filter all data
      that are smaller than ("<"), or greater than (">") the specified thresholds. That means
      if a variable has exactly the same value as the threshold, it will not be filtered. Likewise,
      \code{tprecip} filters all data that are greater than \code{tprecip}. 

      Variables considered of bad quality (as specified by the corresponding quality control variables)      
      will be set to \code{NA} by this routine. Data that do not fulfill the filtering criteria are set to
      \code{NA} if \code{filtered.data.to.NA = TRUE}. Note that with this option *all* variables of the same
      time step are set to \code{NA}. Alternatively, if \code{filtered.data.to.NA = FALSE} data are not set to \code{NA},
      and a new column "valid" is added to the data.frame/matrix, indicating if any value of a row
      did (1) or did not fulfill the filter criteria (0).
}
\examples{
# Example of data filtering; data are for a month within the growing season,
# hence growing season is not filtered.
# If filtered.data.to.NA=TRUE, all values of a row are set to NA if one filter
# variable is beyond its bounds. 
DE_Tha_Jun_2014_2 <- filter.data(DE_Tha_Jun_2014,quality.control=FALSE,
                                 vars.qc=c("Tair","precip","H","LE"),
                                 filter.growseas=FALSE,filter.precip=TRUE,
                                 filter.vars=c("Tair","PPFD","ustar"),
                                 filter.vals.min=c(5,200,0.2),
                                 filter.vals.max=c(NA,NA,NA),NA.as.invalid=TRUE,
                                 quality.ext="_qc",good.quality=c(0,1),
                                 missing.qc.as.bad=TRUE,GPP="GPP",doy="doy",
                                 year="year",tGPP=0.5,ws=15,min.int=5,precip="precip",
                                 tprecip=0.1,precip.hours=24,records.per.hour=2,
                                 filtered.data.to.NA=TRUE)

 ## same, but with filtered.data.to.NA=FALSE
 DE_Tha_Jun_2014_3 <- filter.data(DE_Tha_Jun_2014,quality.control=FALSE,
                                 vars.qc=c("Tair","precip","H","LE"),
                                 filter.growseas=FALSE,filter.precip=TRUE,
                                 filter.vars=c("Tair","PPFD","ustar"),
                                 filter.vals.min=c(5,200,0.2),
                                 filter.vals.max=c(NA,NA,NA),NA.as.invalid=TRUE,
                                 quality.ext="_qc",good.quality=c(0,1),
                                 missing.qc.as.bad=TRUE,GPP="GPP",doy="doy",
                                 year="year",tGPP=0.5,ws=15,min.int=5,precip="precip",
                                 tprecip=0.1,precip.hours=24,records.per.hour=2,
                                 filtered.data.to.NA=FALSE)
                                 
 # note the additional column 'valid' in DE_Tha_Jun_2014_3.
 # To remove time steps marked as filtered out (i.e. 0 values in column 'valid'):
 DE_Tha_Jun_2014_3[DE_Tha_Jun_2014_3["valid"] == 0,] <- NA
  
  
}
