% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_sig_similarity.R
\name{get_sig_similarity}
\alias{get_sig_similarity}
\title{Calculate Similarity between Identified Signatures and Reference Signatures}
\usage{
get_sig_similarity(
  Signature,
  Ref = NULL,
  sig_db = c("SBS", "legacy", "DBS", "ID", "TSB", "SBS_Nik_lab", "RS_Nik_lab",
    "RS_BRCA560", "RS_USARC", "CNS_USARC", "CNS_TCGA", "CNS_TCGA176", "CNS_PCAWG176",
    "SBS_hg19", "SBS_hg38", "SBS_mm9", "SBS_mm10", "DBS_hg19", "DBS_hg38", "DBS_mm9",
    "DBS_mm10", "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ", "latest_SBS_GRCh37",
    "latest_DBS_GRCh37", "latest_ID_GRCh37", "latest_SBS_GRCh38", "latest_DBS_GRCh38",
    "latest_SBS_mm9", "latest_DBS_mm9", "latest_SBS_mm10", "latest_DBS_mm10",
    "latest_SBS_rn6", "latest_DBS_rn6", "latest_CN_GRCh37"),
  db_type = c("", "human-exome", "human-genome"),
  method = "cosine",
  normalize = c("row", "feature"),
  feature_setting = sigminer::CN.features,
  set_order = TRUE,
  pattern_to_rm = NULL,
  verbose = TRUE
)
}
\arguments{
\item{Signature}{a \code{Signature} object or a component-by-signature matrix/\code{data.frame}
(sum of each column is 1) or a normalized component-by-sample matrix/\code{data.frame}
(sum of each column is 1).
More please see examples.}

\item{Ref}{default is \code{NULL}, can be a same object as \code{Signature}.}

\item{sig_db}{default 'legacy', it can be 'legacy' (for \href{https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt/}{COSMIC v2 'SBS'}),
'SBS', 'DBS', 'ID' and 'TSB' (for \href{https://cancer.sanger.ac.uk/cosmic/signatures/}{COSMIV v3.1 signatures})
for small scale mutations.
For more specific details, it can also be 'SBS_hg19', 'SBS_hg38',
'SBS_mm9', 'SBS_mm10', 'DBS_hg19', 'DBS_hg38', 'DBS_mm9', 'DBS_mm10' to use
COSMIC v3 reference signatures from Alexandrov, Ludmil B., et al. (2020) (reference #1).
In addition, it can be one of "SBS_Nik_lab_Organ", "RS_Nik_lab_Organ",
"SBS_Nik_lab", "RS_Nik_lab" to refer reference signatures from
Degasperi, Andrea, et al. (2020) (reference #2);
"RS_BRCA560", "RS_USARC" to reference signatures from BRCA560 and USARC cohorts;
"CNS_USARC" (40 categories), "CNS_TCGA" (48 categories) to reference copy number signatures from USARC cohort and TCGA;
"CNS_TCGA176" (176 categories) and "CNS_PCAWG176" (176 categories) to reference copy number signatures from PCAWG and TCGA separately.
\strong{UPDATE}, the latest version of reference version can be automatically
downloaded and loaded from \url{https://cancer.sanger.ac.uk/signatures/downloads/}
when a option with \code{latest_} prefix is specified (e.g. "latest_SBS_GRCh37").
\strong{Note}: the signature profile for different genome builds are basically same.
And specific database (e.g. 'SBS_mm10') contains less signatures than all COSMIC
signatures (because some signatures are not detected from Alexandrov, Ludmil B., et al. (2020)).
For all available options, check the parameter setting.}

\item{db_type}{only used when \code{sig_db} is enabled.
"" for keeping default, "human-exome" for transforming to exome frequency of component,
and "human-genome" for transforming to whole genome frequency of component.
Currently only works for 'SBS'.}

\item{method}{default is 'cosine' for cosine similarity.}

\item{normalize}{one of "row" and "feature". "row" is typically used
for common mutational signatures. "feature" is designed by me to use when input
are copy number signatures.}

\item{feature_setting}{a \code{data.frame} used for classification.
\strong{Only used when method is "Wang" ("W")}.
Default is \link{CN.features}. Users can also set custom input with "feature",
"min" and "max" columns available. Valid features can be printed by
\code{unique(CN.features$feature)}.}

\item{set_order}{if \code{TRUE}, order the return similarity matrix.}

\item{pattern_to_rm}{patterns for removing some features/components in similarity
calculation. A vector of component name is also accepted.
The remove operation will be done after normalization. Default is \code{NULL}.}

\item{verbose}{if \code{TRUE}, print extra info.}
}
\value{
a \code{list} containing smilarities, aetiologies if available, best match and RSS.
}
\description{
The reference signatures can be either a \code{Signature} object specified by \code{Ref} argument
or known COSMIC signatures specified by \code{sig_db} argument.
Two COSMIC databases are used for comparisons - "legacy" which includes 30 signaures,
and "SBS" - which includes updated/refined 65 signatures. This function is modified
from \code{compareSignatures()} in \strong{maftools} package.
\strong{NOTE}: all reference signatures are generated from gold standard tool:
SigProfiler.
}
\examples{
# Load mutational signature
load(system.file("extdata", "toy_mutational_signature.RData",
  package = "sigminer", mustWork = TRUE
))

s1 <- get_sig_similarity(sig2, Ref = sig2)
s1

s2 <- get_sig_similarity(sig2)
s2
s3 <- get_sig_similarity(sig2, sig_db = "SBS")
s3

# Set order for result similarity matrix
s4 <- get_sig_similarity(sig2, sig_db = "SBS", set_order = TRUE)
s4

## Remove some components
## in similarity calculation
s5 <- get_sig_similarity(sig2,
  Ref = sig2,
  pattern_to_rm = c("T[T>G]C", "T[T>G]G", "T[T>G]T")
)
s5

## Same to DBS and ID signatures
x1 <- get_sig_db("DBS_hg19")
x2 <- get_sig_db("DBS_hg38")
s6 <- get_sig_similarity(x1$db, x2$db)
s6
}
\references{
Alexandrov, Ludmil B., et al. "The repertoire of mutational signatures in human cancer." Nature 578.7793 (2020): 94-101.

Degasperi, Andrea, et al. "A practical framework and online tool for mutational signature analyses show intertissue variation and driver dependencies." Nature cancer 1.2 (2020): 249-263.

Steele, Christopher D., et al. "Undifferentiated sarcomas develop through distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

Nik-Zainal, Serena, et al. "Landscape of somatic mutations in 560 breast cancer whole-genome sequences." Nature 534.7605 (2016): 47-54.

Steele, Christopher D., et al. "Signatures of copy number alterations in human cancer." Nature 606.7916 (2022): 984-991.
}
\author{
Shixiang Wang \href{mailto:w_shixiang@163.com}{w_shixiang@163.com}
}
