% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sagemaker_operations.R
\name{sagemaker_create_training_job}
\alias{sagemaker_create_training_job}
\title{Starts a model training job}
\usage{
sagemaker_create_training_job(TrainingJobName, HyperParameters,
  AlgorithmSpecification, RoleArn, InputDataConfig, OutputDataConfig,
  ResourceConfig, VpcConfig, StoppingCondition, Tags,
  EnableNetworkIsolation, EnableInterContainerTrafficEncryption)
}
\arguments{
\item{TrainingJobName}{[required] The name of the training job. The name must be unique within an AWS
Region in an AWS account.}

\item{HyperParameters}{Algorithm-specific parameters that influence the quality of the model.
You set hyperparameters before you start the learning process. For a
list of hyperparameters for each training algorithm provided by Amazon
SageMaker, see
\href{https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html}{Algorithms}.

You can specify a maximum of 100 hyperparameters. Each hyperparameter is
a key-value pair. Each key and value is limited to 256 characters, as
specified by the \code{Length Constraint}.}

\item{AlgorithmSpecification}{[required] The registry path of the Docker image that contains the training
algorithm and algorithm-specific metadata, including the input mode. For
more information about algorithms provided by Amazon SageMaker, see
\href{https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html}{Algorithms}.
For information about providing your own algorithms, see \href{https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html}{Using Your Own Algorithms with Amazon SageMaker}.}

\item{RoleArn}{[required] The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can
assume to perform tasks on your behalf.

During model training, Amazon SageMaker needs your permission to read
input data from an S3 bucket, download a Docker image that contains
training code, write model artifacts to an S3 bucket, write logs to
Amazon CloudWatch Logs, and publish metrics to Amazon CloudWatch. You
grant permissions for all of these tasks to an IAM role. For more
information, see \href{https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html}{Amazon SageMaker Roles}.

To be able to pass this role to Amazon SageMaker, the caller of this API
must have the \code{iam:PassRole} permission.}

\item{InputDataConfig}{An array of \code{Channel} objects. Each channel is a named input source.
\code{InputDataConfig} describes the input data and its location.

Algorithms can accept input data from one or more channels. For example,
an algorithm might have two channels of input data, \code{training_data} and
\code{validation_data}. The configuration for each channel provides the S3
location where the input data is stored. It also provides information
about the stored data: the MIME type, compression method, and whether
the data is wrapped in RecordIO format.

Depending on the input mode that the algorithm supports, Amazon
SageMaker either copies input data files from an S3 bucket to a local
directory in the Docker container, or makes it available as input
streams.}

\item{OutputDataConfig}{[required] Specifies the path to the S3 bucket where you want to store model
artifacts. Amazon SageMaker creates subfolders for the artifacts.}

\item{ResourceConfig}{[required] The resources, including the ML compute instances and ML storage
volumes, to use for model training.

ML storage volumes store model artifacts and incremental states.
Training algorithms might also use ML storage volumes for scratch space.
If you want Amazon SageMaker to use the ML storage volume to store the
training data, choose \code{File} as the \code{TrainingInputMode} in the algorithm
specification. For distributed training algorithms, specify an instance
count greater than 1.}

\item{VpcConfig}{A VpcConfig object that specifies the VPC that you want your training
job to connect to. Control access to and from your training container by
configuring the VPC. For more information, see \href{https://docs.aws.amazon.com/sagemaker/latest/dg/train-vpc.html}{Protect Training Jobs by Using an Amazon Virtual Private Cloud}.}

\item{StoppingCondition}{[required] Sets a duration for training. Use this parameter to cap model training
costs. To stop a job, Amazon SageMaker sends the algorithm the \code{SIGTERM}
signal, which delays job termination for 120 seconds. Algorithms might
use this 120-second window to save the model artifacts.

When Amazon SageMaker terminates a job because the stopping condition
has been met, training algorithms provided by Amazon SageMaker save the
intermediate results of the job. This intermediate data is a valid model
artifact. You can use it to create a model using the \code{CreateModel} API.}

\item{Tags}{An array of key-value pairs. For more information, see \href{https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/cost-alloc-tags.html#allocation-what}{Using Cost Allocation Tags}
in the \emph{AWS Billing and Cost Management User Guide}.}

\item{EnableNetworkIsolation}{Isolates the training container. No inbound or outbound network calls
can be made, except for calls between peers within a training cluster
for distributed training. If you enable network isolation for training
jobs that are configured to use a VPC, Amazon SageMaker downloads and
uploads customer data and model artifacts through the specified VPC, but
the training container does not have network access.

The Semantic Segmentation built-in algorithm does not support network
isolation.}

\item{EnableInterContainerTrafficEncryption}{To encrypt all communications between ML compute instances in
distributed training, choose \code{True}. Encryption provides greater
security for distributed training, but training might take longer. How
long it takes depends on the amount of communication between compute
instances, especially if you use a deep learning algorithm in
distributed training. For more information, see \href{https://docs.aws.amazon.com/sagemaker/latest/dg/train-encrypt.html}{Protect Communications Between ML Compute Instances in a Distributed Training Job}.}
}
\description{
Starts a model training job. After training completes, Amazon SageMaker
saves the resulting model artifacts to an Amazon S3 location that you
specify.
}
\details{
If you choose to host your model using Amazon SageMaker hosting
services, you can use the resulting model artifacts as part of the
model. You can also use the artifacts in a machine learning service
other than Amazon SageMaker, provided that you know how to use them for
inferences.

In the request body, you provide the following:
\itemize{
\item \code{AlgorithmSpecification} - Identifies the training algorithm to use.
\item \code{HyperParameters} - Specify these algorithm-specific parameters to
influence the quality of the final model. For a list of
hyperparameters for each training algorithm provided by Amazon
SageMaker, see
\href{https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html}{Algorithms}.
\item \code{InputDataConfig} - Describes the training dataset and the Amazon S3
location where it is stored.
\item \code{OutputDataConfig} - Identifies the Amazon S3 location where you
want Amazon SageMaker to save the results of model training.
\item \code{ResourceConfig} - Identifies the resources, ML compute instances,
and ML storage volumes to deploy for model training. In distributed
training, you specify more than one instance.
\item \code{RoleARN} - The Amazon Resource Number (ARN) that Amazon SageMaker
assumes to perform tasks on your behalf during model training. You
must grant this role the necessary permissions so that Amazon
SageMaker can successfully complete model training.
\item \code{StoppingCondition} - Sets a duration for training. Use this
parameter to cap model training costs.
}

For more information about Amazon SageMaker, see \href{https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works.html}{How It Works}.
}
\section{Request syntax}{
\preformatted{svc$create_training_job(
  TrainingJobName = "string",
  HyperParameters = list(
    "string"
  ),
  AlgorithmSpecification = list(
    TrainingImage = "string",
    AlgorithmName = "string",
    TrainingInputMode = "Pipe"|"File",
    MetricDefinitions = list(
      list(
        Name = "string",
        Regex = "string"
      )
    )
  ),
  RoleArn = "string",
  InputDataConfig = list(
    list(
      ChannelName = "string",
      DataSource = list(
        S3DataSource = list(
          S3DataType = "ManifestFile"|"S3Prefix"|"AugmentedManifestFile",
          S3Uri = "string",
          S3DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
          AttributeNames = list(
            "string"
          )
        )
      ),
      ContentType = "string",
      CompressionType = "None"|"Gzip",
      RecordWrapperType = "None"|"RecordIO",
      InputMode = "Pipe"|"File",
      ShuffleConfig = list(
        Seed = 123
      )
    )
  ),
  OutputDataConfig = list(
    KmsKeyId = "string",
    S3OutputPath = "string"
  ),
  ResourceConfig = list(
    InstanceType = "ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge",
    InstanceCount = 123,
    VolumeSizeInGB = 123,
    VolumeKmsKeyId = "string"
  ),
  VpcConfig = list(
    SecurityGroupIds = list(
      "string"
    ),
    Subnets = list(
      "string"
    )
  ),
  StoppingCondition = list(
    MaxRuntimeInSeconds = 123
  ),
  Tags = list(
    list(
      Key = "string",
      Value = "string"
    )
  ),
  EnableNetworkIsolation = TRUE|FALSE,
  EnableInterContainerTrafficEncryption = TRUE|FALSE
)
}
}

\keyword{internal}
