% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/machinelearning_operations.R
\name{machinelearning_create_data_source_from_redshift}
\alias{machinelearning_create_data_source_from_redshift}
\title{Creates a DataSource from a database hosted on an Amazon Redshift
cluster}
\usage{
machinelearning_create_data_source_from_redshift(DataSourceId,
  DataSourceName, DataSpec, RoleARN, ComputeStatistics)
}
\arguments{
\item{DataSourceId}{[required] A user-supplied ID that uniquely identifies the \code{DataSource}.}

\item{DataSourceName}{A user-supplied name or description of the \code{DataSource}.}

\item{DataSpec}{[required] The data specification of an Amazon Redshift \code{DataSource}:
\itemize{
\item DatabaseInformation -
\itemize{
\item \code{DatabaseName} - The name of the Amazon Redshift database.
\item \code{ ClusterIdentifier} - The unique ID for the Amazon Redshift
cluster.
}
\item DatabaseCredentials - The AWS Identity and Access Management (IAM)
credentials that are used to connect to the Amazon Redshift
database.
\item SelectSqlQuery - The query that is used to retrieve the observation
data for the \code{Datasource}.
\item S3StagingLocation - The Amazon Simple Storage Service (Amazon S3)
location for staging Amazon Redshift data. The data retrieved from
Amazon Redshift using the \code{SelectSqlQuery} query is stored in this
location.
\item DataSchemaUri - The Amazon S3 location of the \code{DataSchema}.
\item DataSchema - A JSON string representing the schema. This is not
required if \code{DataSchemaUri} is specified.
\item DataRearrangement - A JSON string that represents the splitting and
rearrangement requirements for the \code{DataSource}.

Sample -
\code{ "\{\"splitting\":\{\"percentBegin\":10,\"percentEnd\":60\}\}"}
}}

\item{RoleARN}{[required] A fully specified role Amazon Resource Name (ARN). Amazon ML assumes the
role on behalf of the user to create the following:
\itemize{
\item A security group to allow Amazon ML to execute the \code{SelectSqlQuery}
query on an Amazon Redshift cluster
\item An Amazon S3 bucket policy to grant Amazon ML read/write permissions
on the \code{S3StagingLocation}
}}

\item{ComputeStatistics}{The compute statistics for a \code{DataSource}. The statistics are generated
from the observation data referenced by a \code{DataSource}. Amazon ML uses
the statistics internally during \code{MLModel} training. This parameter must
be set to \code{true} if the \code{DataSource} needs to be used for \code{MLModel}
training.}
}
\description{
Creates a \code{DataSource} from a database hosted on an Amazon Redshift
cluster. A \code{DataSource} references data that can be used to perform
either \code{CreateMLModel}, \code{CreateEvaluation}, or \code{CreateBatchPrediction}
operations.
}
\details{
\code{CreateDataSourceFromRedshift} is an asynchronous operation. In response
to \code{CreateDataSourceFromRedshift}, Amazon Machine Learning (Amazon ML)
immediately returns and sets the \code{DataSource} status to \code{PENDING}. After
the \code{DataSource} is created and ready for use, Amazon ML sets the
\code{Status} parameter to \code{COMPLETED}. \code{DataSource} in \code{COMPLETED} or
\code{PENDING} states can be used to perform only \code{CreateMLModel},
\code{CreateEvaluation}, or \code{CreateBatchPrediction} operations.

If Amazon ML can't accept the input source, it sets the \code{Status}
parameter to \code{FAILED} and includes an error message in the \code{Message}
attribute of the \code{GetDataSource} operation response.

The observations should be contained in the database hosted on an Amazon
Redshift cluster and should be specified by a \code{SelectSqlQuery} query.
Amazon ML executes an \code{Unload} command in Amazon Redshift to transfer
the result set of the \code{SelectSqlQuery} query to \code{S3StagingLocation}.

After the \code{DataSource} has been created, it's ready for use in
evaluations and batch predictions. If you plan to use the \code{DataSource}
to train an \code{MLModel}, the \code{DataSource} also requires a recipe. A recipe
describes how each input variable will be used in training an \code{MLModel}.
Will the variable be included or excluded from training? Will the
variable be manipulated; for example, will it be combined with another
variable or will it be split apart into word combinations? The recipe
provides answers to these questions.

You can't change an existing datasource, but you can copy and modify
the settings from an existing Amazon Redshift datasource to create a new
datasource. To do so, call \code{GetDataSource} for an existing datasource
and copy the values to a \code{CreateDataSource} call. Change the settings
that you want to change and make sure that all required fields have the
appropriate values.
}
\section{Request syntax}{
\preformatted{svc$create_data_source_from_redshift(
  DataSourceId = "string",
  DataSourceName = "string",
  DataSpec = list(
    DatabaseInformation = list(
      DatabaseName = "string",
      ClusterIdentifier = "string"
    ),
    SelectSqlQuery = "string",
    DatabaseCredentials = list(
      Username = "string",
      Password = "string"
    ),
    S3StagingLocation = "string",
    DataRearrangement = "string",
    DataSchema = "string",
    DataSchemaUri = "string"
  ),
  RoleARN = "string",
  ComputeStatistics = TRUE|FALSE
)
}
}

\keyword{internal}
