% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv_to_parquet.R
\name{csv_to_parquet}
\alias{csv_to_parquet}
\title{Convert a csv file to parquet format}
\usage{
csv_to_parquet(
  path_to_csv,
  url_to_csv,
  csv_as_a_zip = FALSE,
  filename_in_zip,
  path_to_parquet,
  columns = "all",
  compression = "snappy",
  compression_level = NULL,
  partition = "no",
  encoding = "UTF-8",
  ...
)
}
\arguments{
\item{path_to_csv}{string that indicates the path to the csv file}

\item{url_to_csv}{string that indicates the URL of the csv file}

\item{csv_as_a_zip}{boolean that indicates if the csv is stored in a zip}

\item{filename_in_zip}{name of the csv file in the zip (useful if several csv are included in the zip). Required if `csv_as_a_zip` is TRUE.}

\item{path_to_parquet}{string that indicates the path to the directory where the parquet file will be stored}

\item{columns}{character vector of columns to select from the input file (by default, all columns are selected).}

\item{compression}{compression algorithm. Default "snappy".}

\item{compression_level}{compression level. Meaning depends on compression algorithm.}

\item{partition}{string ("yes" or "no" - by default) that indicates whether you want to create a partitioned parquet file.
If "yes", `"partitioning"` argument must be filled in. In this case, a folder will be created for each modality of the variable filled in `"partitioning"`.}

\item{encoding}{string that indicates the character encoding for the input file.}

\item{...}{additional format-specific arguments, see \href{https://arrow.apache.org/docs/r/reference/write_parquet.html}{arrow::write_parquet()}
and \href{https://arrow.apache.org/docs/r/reference/write_dataset.html}{arrow::write_dataset()} for more informations.}
}
\value{
A parquet file, invisibly
}
\description{
This function allows to convert a csv file to parquet format. \cr

Several conversion possibilities are offered :

\itemize{

\item{From a locally stored file. Argument `path_to_csv` must then be used;}
\item{From a URL. Argument `url_to_csv` must then be used.}

}

Two conversions possibilities are offered :

\itemize{

\item{Convert to a single parquet file. Argument `path_to_parquet` must then be used;}
\item{Convert to a partitioned parquet file. Additionnal arguments `partition` and `partitioning` must then be used;}

}
}
\note{
Be careful, if the zip size exceeds 4 GB, the function may truncate
the data (because unzip() won't work reliably in this case -
see \href{https://rdrr.io/r/utils/unzip.html}{here}).
In this case, it's advised to unzip your csv file by hand
(for example with \href{https://www.7-zip.org/}{7-Zip})
then use the function with the argument `path_to_csv`.
}
\examples{


# Conversion from a local csv file to a single parquet file :

csv_to_parquet(
  path_to_csv = parquetize_example("region_2022.csv"),
  path_to_parquet = tempdir()
)

# Conversion from a local csv file to a single parquet file and select only
# fex columns :

csv_to_parquet(
  path_to_csv = parquetize_example("region_2022.csv"),
  path_to_parquet = tempdir(),
  columns = c("REG","LIBELLE")
)

# Conversion from a local csv file  to a partitioned parquet file  :

csv_to_parquet(
  path_to_csv = parquetize_example("region_2022.csv"),
  path_to_parquet = tempdir(),
  partition = "yes",
  partitioning =  c("REG")
)

# Conversion from a URL and a csv file with "gzip" compression :

csv_to_parquet(
  url_to_csv =
  "https://github.com/sidsriv/Introduction-to-Data-Science-in-python/raw/master/census.csv",
  path_to_parquet = tempdir(),
  compression = "gzip",
  compression_level = 5
)

# Conversion from a URL and a zipped file :

csv_to_parquet(
  url_to_csv = "https://www.nomisweb.co.uk/output/census/2021/census2021-ts007.zip",
  csv_as_a_zip = TRUE,
  filename_in_zip = "census2021-ts007-ctry.csv",
  path_to_parquet = tempdir()
)
}
