Deploy Models to Bedrock with BedrockModelBuilder#

This notebook shows three ways to deploy a model to Amazon Bedrock using BedrockModelBuilder:

  1. From a TrainingJob — fetch a completed training job and deploy its artifacts

  2. From a direct S3 path — point to model weights in S3 directly

  3. From a ModelTrainer — deploy after training completes

import boto3
import json
import time
from sagemaker.core.resources import TrainingJob
from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder
# Configuration — update these for your account
ROLE_ARN = "arn:aws:iam::<ACCOUNT_ID>:role/<ROLE_NAME>"
REGION = "us-east-1"

Option 1: Deploy from a completed TrainingJob#

Fetch a completed training job and pass it directly to BedrockModelBuilder. The builder resolves model artifacts automatically from model_artifacts.s3_model_artifacts.

TRAINING_JOB_NAME = "my-sft-nova-lite-job"

training_job = TrainingJob.get(
    training_job_name=TRAINING_JOB_NAME,
    region=REGION,
)
print(f"Status: {training_job.training_job_status}")
print(f"Model artifacts: {training_job.model_artifacts.s3_model_artifacts}")
builder = BedrockModelBuilder(model=training_job)

# Nova models: use custom_model_name to deploy via create_custom_model
result = builder.deploy(
    custom_model_name="my-nova-lite-custom",
    role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")

Option 2: Deploy from a direct S3 path#

If you already have model weights in S3 (e.g., from a custom training pipeline or a downloaded model), pass the S3 URI string directly.

# Point directly to the S3 path containing model weights
S3_MODEL_PATH = "s3://my-bucket/models/nova-lite-sft-checkpoint/"

builder = BedrockModelBuilder(model=S3_MODEL_PATH)
# Nova path: provide custom_model_name to use create_custom_model API
result = builder.deploy(
    custom_model_name="my-s3-nova-model",
    role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")
# OSS path: omit custom_model_name to use create_model_import_job API
builder_oss = BedrockModelBuilder(model="s3://my-bucket/models/llama-3-8b-finetuned/")

result = builder_oss.deploy(
    job_name="import-llama-finetuned",
    imported_model_name="llama-3-8b-finetuned",
    role_arn=ROLE_ARN,
)
print(f"Model ARN: {result.get('importedModelArn')}")
print(f"Status: {result.get('status')}")

Option 3: Deploy from a ModelTrainer (after training)#

After calling trainer.train(wait=True), the trainer’s _latest_training_job holds a TrainingJob object with model_artifacts already resolved.

from sagemaker.train import SFTTrainer
from sagemaker.train.common import TrainingType
from sagemaker.core.training.configs import TrainingJobCompute

sft_trainer = SFTTrainer(
    model="nova-textgeneration-lite",
    training_type=TrainingType.LORA,
    training_dataset="s3://my-bucket/data/train.jsonl",
    compute=TrainingJobCompute(instance_type="ml.p5.48xlarge", instance_count=4),
    role=ROLE_ARN,
)

# Train and wait for completion
sft_trainer.train(wait=True)

# The trainer now has model_artifacts resolved on _latest_training_job
print(f"Checkpoint: {sft_trainer._latest_training_job.model_artifacts.s3_model_artifacts}")
# Pass the trainer directly to BedrockModelBuilder
builder = BedrockModelBuilder(model=sft_trainer)

result = builder.deploy(
    custom_model_name="my-trained-nova-model",
    role_arn=ROLE_ARN,
)
print(f"Deployment ARN: {result.get('customModelDeploymentArn')}")

Option 4: Deploy from a ModelPackage#

Pass a versioned ModelPackage from the SageMaker Model Registry for governed, production deployments.

from sagemaker.core.resources import ModelPackage
from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder

model_package = ModelPackage.get(
    model_package_name="arn:aws:sagemaker:us-east-1:123456789012:model-package/my-models/3"
)

bedrock_builder = BedrockModelBuilder(model=model_package)
result = bedrock_builder.deploy(
    custom_model_name="my-registry-model",
    role_arn=ROLE_ARN,
)

Provisioned Throughput (optional)#

After deploy() returns, the model is ready for on-demand inference. For dedicated capacity, use create_provisioned_throughput():

# Optional: create provisioned throughput for dedicated capacity
# pt_result = builder.create_provisioned_throughput(
#     provisioned_model_name="my-provisioned-model",
#     model_units=1,
# )
# print(f"Provisioned throughput ARN: {pt_result.get('provisionedModelArn')}")

Test inference#

# Test inference against the deployed model
bedrock_runtime = boto3.client("bedrock-runtime", region_name=REGION)

model_arn = result.get("customModelDeploymentArn") or result.get("importedModelArn")

response = bedrock_runtime.invoke_model(
    modelId=model_arn,
    body=json.dumps({
        "messages": [{"role": "user", "content": "What is the capital of France?"}],
        "max_tokens": 100,
        "temperature": 0.7,
    }),
)

output = json.loads(response["body"].read().decode())
print(output)