% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/find_countrycol.R
\name{find_countrycol}
\alias{find_countrycol}
\title{Finds columns containing country names}
\usage{
find_countrycol(
  x,
  return_index = FALSE,
  allow_NA = TRUE,
  min_share = 0.8,
  sample_size = 1000
)
}
\arguments{
\item{x}{A data frame object}

\item{return_index}{A logical value indicating whether the function should return the index of country columns instead of the column names. Default is \code{FALSE}, column names are returned.}

\item{allow_NA}{Logical value indicating whether columns containing \code{NA} values are to be considered as country columns. Default is \code{allow_NA=FALSE}, the function will not return country column containing \code{NA} values.}

\item{min_share}{A value between \code{0} and \code{1} indicating the minimum share of country names in columns that are returned. A value of \code{0} will return any column containing a country name. A value of \code{1} will return only columns whose entries are all country names. Default is \code{0.9}, i.e. at least 90 percent of the column entries need to be country names.}

\item{sample_size}{Either \code{NA} or a numeric value indicating the sample size used for evaluating columns. Default is \code{1000}. If \code{NA} is passed, the function will evaluate the full table. The minimum accepted value is \code{100} (i.e. 100 randomly sampled rows are used to evaluate the columns). This parameter can be tuned to speed up computation on long datasets. Taking a sample could result in inexact identification of key columns, accuracy improves with larger samples.}
}
\value{
Returns a vector of country names (\code{return_index=FALSE}) or column indices (\code{return_index=TRUE}) of columns containing country names.
}
\description{
This function takes a data frame as argument and returns the column name (or index) of all columns containing country names.
It can be used to automate the search of country columns in data frames.
For the purpose of this function, a country is any of the 249 territories designated in the ISO standard \code{3166}.
On large datasets a random sample is used for evaluating the columns.
}
\examples{
find_countrycol(x=data.frame(a=c("Brésil","Tonga","FRA"), b=c(1,2,3)))
}
\seealso{
\link[countries]{is_country}, \link[countries]{country_name}, \link[countries]{find_keycol}, \link[countries]{find_timecol}
}
