Part.1
What is SageMaker Python SDK?
Part.2
Installation and Configuration
pip install sagemaker
import boto3
import sagemaker
from sagemaker.session import Session
# Configure AWS session
session = boto3.Session(
aws_access_key_id='YOUR_ACCESS_KEY',
aws_secret_access_key='YOUR_SECRET_KEY',
region_name='us-west-2')
# Create SageMaker session
sagemaker_session = Session(boto_session=session)
Part.3
Train Your First Model
from sagemaker.xgboost import XGBoost
# Prepare training data
train_path = 's3://your-bucket/train/train.csv'
val_path = 's3://your-bucket/validation/validation.csv'
# Create XGBoost estimator
xgb_estimator = XGBoost(
entry_point='train.py', # Training script
role='YOUR_IAM_ROLE',
instance_type='ml.m5.xlarge', # Training instance type
instance_count=1, # Number of instances
framework_version='1.5-1',
py_version='py3',
hyperparameters={
# Hyperparameter settings
'max_depth': 5,
'eta': 0.2,
'objective': 'binary:logistic'
})
# Start training
xgb_estimator.fit({
'train': train_path,
'validation': val_path})
Part.4
Write the Training Script
# train.py
import argparse
import os
import pandas as pd
import xgboost as xgb
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--max_depth', type=int)
parser.add_argument('--eta', type=float)
return parser.parse_known_args()[0]
if __name__ == '__main__':
args = parse_args()
# Read data
train_data = pd.read_csv(os.path.join('/opt/ml/input/data/train', 'train.csv'))
validation_data = pd.read_csv(os.path.join('/opt/ml/input/data/validation', 'validation.csv'))
# Prepare training set
dtrain = xgb.DMatrix(train_data.drop('target', axis=1), label=train_data['target'])
dval = xgb.DMatrix(validation_data.drop('target', axis=1), label=validation_data['target'])
# Train model
params = {
'max_depth': args.max_depth,
'eta': args.eta,
'objective': 'binary:logistic'
}
model = xgb.train(params, dtrain, evals=[(dval, 'validation')])
# Save model
model.save_model('/opt/ml/model/xgboost-model')
Part.5
Deploy the Model as an Endpoint
# Deploy model
predictor = xgb_estimator.deploy(
initial_instance_count=1,
instance_type='ml.t2.medium')
# Make predictions
import numpy as np
test_data = np.random.rand(3, 4) # Example data
predictions = predictor.predict(test_data)
print("Predictions:", predictions)
Part.6
Using Built-in Algorithms
from sagemaker.amazon.amazon_estimator import get_image_uri
# Use built-in linear learner
linear_learner = sagemaker.estimator.Estimator(
get_image_uri(region_name, 'linear-learner'),
role,
instance_count=1,
instance_type='ml.m4.xlarge',
output_path='s3://your-bucket/output')
# Set hyperparameters
linear_learner.set_hyperparameters(
feature_dim=10,
predictor_type='binary_classifier',
mini_batch_size=100)
# Start training
linear_learner.fit({'train': train_path})
-
Remember to clean up unused endpoints promptly to avoid extra costs. -
Select the appropriate instance type to balance cost and performance. -
Data format must meet the requirements; SageMaker is very picky!
Part.7
Using SageMaker Studio
from sagemaker.studio import Studio
# Create Studio session
studio = Studio()
# Open Notebook
studio.open_notebook('my_notebook.ipynb')