Deploy Models to Bedrock with BedrockModelBuilder#
This notebook shows three ways to deploy a model to Amazon Bedrock using BedrockModelBuilder:
From a TrainingJob — fetch a completed training job and deploy its artifacts
From a direct S3 path — point to model weights in S3 directly
From a ModelTrainer — deploy after training completes
import boto3
import json
import time
from sagemaker.core.resources import TrainingJob
from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder
# Configuration — update these for your account
ROLE_ARN = "arn:aws:iam::<ACCOUNT_ID>:role/<ROLE_NAME>"
REGION = "us-east-1"
Option 1: Deploy from a completed TrainingJob#
Fetch a completed training job and pass it directly to BedrockModelBuilder.
The builder resolves model artifacts automatically from model_artifacts.s3_model_artifacts.
TRAINING_JOB_NAME = "my-sft-nova-lite-job"
training_job = TrainingJob.get(
training_job_name=TRAINING_JOB_NAME,
region=REGION,
)
print(f"Status: {training_job.training_job_status}")
print(f"Model artifacts: {training_job.model_artifacts.s3_model_artifacts}")
builder = BedrockModelBuilder(model=training_job)
# Nova models: use custom_model_name to deploy via create_custom_model
result = builder.deploy(
custom_model_name="my-nova-lite-custom",
role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")
Option 2: Deploy from a direct S3 path#
If you already have model weights in S3 (e.g., from a custom training pipeline or a downloaded model), pass the S3 URI string directly.
# Point directly to the S3 path containing model weights
S3_MODEL_PATH = "s3://my-bucket/models/nova-lite-sft-checkpoint/"
builder = BedrockModelBuilder(model=S3_MODEL_PATH)
# Nova path: provide custom_model_name to use create_custom_model API
result = builder.deploy(
custom_model_name="my-s3-nova-model",
role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")
# OSS path: omit custom_model_name to use create_model_import_job API
builder_oss = BedrockModelBuilder(model="s3://my-bucket/models/llama-3-8b-finetuned/")
result = builder_oss.deploy(
job_name="import-llama-finetuned",
imported_model_name="llama-3-8b-finetuned",
role_arn=ROLE_ARN,
)
print(f"Model ARN: {result.get('importedModelArn')}")
print(f"Status: {result.get('status')}")
Option 3: Deploy from a ModelTrainer (after training)#
After calling trainer.train(wait=True), the trainer’s _latest_training_job
holds a TrainingJob object with model_artifacts already resolved.
from sagemaker.train import SFTTrainer
from sagemaker.train.common import TrainingType
from sagemaker.core.training.configs import TrainingJobCompute
sft_trainer = SFTTrainer(
model="nova-textgeneration-lite",
training_type=TrainingType.LORA,
training_dataset="s3://my-bucket/data/train.jsonl",
compute=TrainingJobCompute(instance_type="ml.p5.48xlarge", instance_count=4),
role=ROLE_ARN,
)
# Train and wait for completion
sft_trainer.train(wait=True)
# The trainer now has model_artifacts resolved on _latest_training_job
print(f"Checkpoint: {sft_trainer._latest_training_job.model_artifacts.s3_model_artifacts}")
# Pass the trainer directly to BedrockModelBuilder
builder = BedrockModelBuilder(model=sft_trainer)
result = builder.deploy(
custom_model_name="my-trained-nova-model",
role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")
Option 4: Deploy from a ModelPackage#
Pass a versioned ModelPackage from the SageMaker Model Registry for governed,
production deployments.
from sagemaker.core.resources import ModelPackage
from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder
model_package = ModelPackage.get(
model_package_name="arn:aws:sagemaker:us-east-1:123456789012:model-package/my-models/3"
)
bedrock_builder = BedrockModelBuilder(model=model_package)
result = bedrock_builder.deploy(
custom_model_name="my-registry-model",
role_arn=ROLE_ARN,
)
Provisioned Throughput (optional)#
After deploy() returns, the model is ready for on-demand inference.
For dedicated capacity, use create_provisioned_throughput():
# Optional: create provisioned throughput for dedicated capacity
# pt_result = builder.create_provisioned_throughput(
# provisioned_model_name="my-provisioned-model",
# model_units=1,
# )
# print(f"Provisioned throughput ARN: {pt_result.get('provisionedModelArn')}")
Test inference#
# Test inference against the deployed model
bedrock_runtime = boto3.client("bedrock-runtime", region_name=REGION)
model_arn = result.get("customModelDeploymentArn") or result.get("importedModelArn")
response = bedrock_runtime.invoke_model(
modelId=model_arn,
body=json.dumps({
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"max_tokens": 100,
"temperature": 0.7,
}),
)
output = json.loads(response["body"].read().decode())
print(output)