% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shiny_related_functions.R
\name{StrelkaIDVCFFilesToZipFile}
\alias{StrelkaIDVCFFilesToZipFile}
\title{Create a zip file which contains ID (small insertion and deletion) catalog
and plot PDF from Strelka ID VCF files}
\usage{
StrelkaIDVCFFilesToZipFile(
  dir,
  zipfile,
  ref.genome,
  region = "unknown",
  names.of.VCFs = NULL,
  base.filename = "",
  flag.mismatches = 0,
  return.annotated.vcfs = FALSE,
  suppress.discarded.variants.warnings = TRUE
)
}
\arguments{
\item{dir}{Pathname of the directory which contains \strong{only} the Strelka
ID VCF files. Each Strelka ID VCF \strong{must} have a file extension
".vcf" (case insensitive) and share the \strong{same} \code{ref.genome} and
\code{region}.}

\item{zipfile}{Pathname of the zip file to be created.}

\item{ref.genome}{A \code{ref.genome} argument as described in
\code{\link{ICAMS}}.}

\item{region}{A character string designating a genomic region;
see \code{\link{as.catalog}} and \code{\link{ICAMS}}.}

\item{names.of.VCFs}{Optional. Character vector of names of the VCF files.
The order of names in \code{names.of.VCFs} should match the order of VCFs
listed in \code{dir}. If \code{NULL}(default), this function will remove
all of the path up to and including the last path separator (if any) in
\code{dir} and file paths without extensions (and the leading dot) will be
used as the names of the VCF files.}

\item{base.filename}{Optional. The base name of the CSV and PDF file to be
produced; the file is ending in \code{catID.csv} and \code{catID.pdf}
respectively.}

\item{flag.mismatches}{Deprecated. If there are ID variants whose \code{REF}
do not match the extracted sequence from \code{ref.genome}, the function
will automatically discard these variants and an element
\code{discarded.variants} will appear in the return value. See
\code{\link{AnnotateIDVCF}} for more details.}

\item{return.annotated.vcfs}{Logical. Whether to return the annotated VCFs
with additional columns showing mutation class for each variant. Default is
FALSE.}

\item{suppress.discarded.variants.warnings}{Logical. Whether to suppress
warning messages showing information about the discarded variants. Default
is TRUE.}
}
\description{
Create ID (small insertion and deletion) catalog from the Strelka ID VCFs
specified by \code{dir}, save the catalog as CSV file, plot it to PDF and
generate a zip archive of all the output files.
}
\details{
This function calls \code{\link{StrelkaIDVCFFilesToCatalog}},
\code{\link{PlotCatalogToPdf}}, \code{\link{WriteCatalog}} and
\code{zip::zipr}.
}
\section{Value}{

A \strong{list} of elements:
\itemize{
\item \code{catalog}: The ID (small insertion and deletion) catalog with
attributes added. See \code{\link{as.catalog}} for more details.
\item \code{discarded.variants}: \strong{Non-NULL only if} there are variants
that were excluded from the analysis. See the added extra column
\code{discarded.reason} for more details.
\item \code{annotated.vcfs}:
\strong{Non-NULL only if} \code{return.annotated.vcfs} = TRUE. A list of
data frames which contain the original VCF's ID mutation rows with three
additional columns \code{seq.context.width}, \code{seq.context} and
\code{ID.class} added. The category assignment of each ID mutation in VCF can
be obtained from \code{ID.class} column.
}
}

\section{ID classification}{

See \url{https://github.com/steverozen/ICAMS/blob/master/data-raw/PCAWG7_indel_classification_2021_09_03.xlsx}
for additional information on ID (small insertion and deletion) mutation
classification.

See the documentation for \code{\link{Canonicalize1Del}} which first handles
deletions in homopolymers, then handles deletions in simple repeats with
longer repeat units, (e.g. \code{CACACACA}, see
\code{\link{FindMaxRepeatDel}}), and if the deletion is not in a simple
repeat, looks for microhomology (see \code{\link{FindDelMH}}).

See the code for unexported function \code{\link{CanonicalizeID}}
and the functions it calls for handling of insertions.
}

\section{Note}{

In ID (small insertion and deletion) catalogs, deletion repeat sizes range
from 0 to 5+, but for plotting and end-user documentation deletion repeat
sizes range from 1 to 6+.
}

\examples{
dir <- c(system.file("extdata/Strelka-ID-vcf",
                     package = "ICAMS"))
if (requireNamespace("BSgenome.Hsapiens.1000genomes.hs37d5", quietly = TRUE)) {
  catalogs <-
    StrelkaIDVCFFilesToZipFile(dir,
                               zipfile = file.path(tempdir(), "test.zip"),
                               ref.genome = "hg19",
                               region = "genome",
                               base.filename = "Strelka-ID")
  unlink(file.path(tempdir(), "test.zip"))}
}
