Backend applications have a lot of moving parts. Besides designing your domain models, you also have to think about what to expose and how to persist your data.
For applications that use relational datastores like MYSQL, PostgreSQL, migrating schema is crucial for a successful deployment.
I like to think of migrations as code that updates a database schema. In this post, supported by a Github project, you will learn one way to handle serverless aurora schema migration using a custom resource.
Custom Resources
Custom resources enable you to write custom provisioning logic in templates that AWS CloudFormation runs anytime you create, update (if you change the custom resource), or delete the stack.
We could write a lambda function with a custom resource that runs migrations after provisioning a database.
The next question that comes to mind is how to make the schema scripts available to lambda without actually including it in the deployment layer. This is where the AWS Lambda layer comes in.
The Lambda layer allows you to archive dependencies and uses those in your functions without including them in your deployment layer.
Without further ado, let's see how to do this with CDK.
Creating the Migration Handler
The migration_handler.py
is a function that is executed during the creation of the custom resource. It uses DATA API to connect the database instance, read the migration files, and run them on the database.
import boto3
import os
import logging as log
from botocore import exceptions
import cfnresponse
import glob
log.getLogger().setLevel(log.INFO)
def main(event, context):
SQL_PATH = "/opt" # Layers are extracted to the /opt directory in the function execution environment.
# This needs to change if there are to be multiple resources
# in the same stack
physical_id = "SchemaMigrationResource"
# If this is a Delete event, do nothing. The schema will be destroyed along with the cluster.
if event['RequestType'] == 'Delete':
cfnresponse.send(event, context, cfnresponse.SUCCESS, {"Response": "Deleted successfully"}, physical_id)
try:
log.info("Input event: %s", event)
sqlfiles = glob.glob(os.path.join(SQL_PATH, "*.sql"))
log.info(sqlfiles)
for file_path in sqlfiles:
log.info(f"Found an SQL script in path:{file_path}")
execute_sql_file(file_path)
log.info("Ran migration successfully")
attributes = {"Response": f"Ran migration successfully for these files:{sqlfiles}"}
cfnresponse.send(event, context, cfnresponse.SUCCESS, attributes, physical_id)
except Exception as e:
log.exception(e)
# cfnresponse's error message is always "see CloudWatch"
cfnresponse.send(event, context, cfnresponse.FAILED, {}, physical_id)
raise RuntimeError("Create failure requested")
def execute_statement(sql, sql_parameters=[], transaction_id=None):
log.info(f"sql query:{sql}")
client = boto3.client("rds-data")
parameters = {
"secretArn": os.getenv("DB_SECRET_ARN"),
"database": os.getenv("DB_NAME"),
"resourceArn": os.getenv("DB_CLUSTER_ARN"),
"sql": sql,
"parameters": sql_parameters,
}
if transaction_id is not None:
parameters["transactionId"] = transaction_id
try:
response = client.execute_statement(**parameters)
return response
except client.exceptions.BadRequestException as e:
log.exception(e)
raise RuntimeError("Create failure requested")
def execute_sql_file(file_path: str):
log.info(f"executing file in : {file_path}")
with open(file_path, "r") as script:
script_content = script.read()
queries = script_content.split(";")
for query in queries:
sql = query.strip()
if sql:
execute_statement(query)
log.info(f"executed the file : {file_path} successfully")
Creating the Custom Resource
The migration.py
contains the code for creating the lambda function and the lambda layer. We passed some environment variables (like database reference, secret and database name) needed by the lambda function to connect to the database.
from aws_cdk import (
core,
custom_resources as cr,
aws_lambda as _lambda,
aws_cloudformation as cfn,
aws_iam as _iam,
)
from aws_cdk.aws_lambda import Runtime
SQL_SCRIPTS_PATH = "scripts/schema"
class SchemaMigrationResource(core.Construct):
def __init__(
self,
scope: core.Construct,
id: str,
secret_arn: str,
db_name: str,
db_ref: str,
**kwargs,
):
super().__init__(
scope,
id,
**kwargs,
)
with open("migration_handler.py", encoding="utf-8") as fp:
code_body = fp.read()
lambda_function = _lambda.SingletonFunction(
self,
"Singleton",
uuid="f7d4f730-4ee1-11e8-9c2d-fa7ae01bbebc",
code=_lambda.InlineCode(code_body),
handler="index.main",
timeout=core.Duration.seconds(300),
runtime=_lambda.Runtime.PYTHON_3_7,
layers=[
_lambda.LayerVersion(
scope,
id="migrationscripts",
code=_lambda.Code.from_asset(SQL_SCRIPTS_PATH),
description="Database migration scripts",
)
],
environment={
"DB_NAME": db_name,
"DB_SECRET_ARN": secret_arn,
"DB_CLUSTER_ARN": db_ref,
},
)
# Allow lambda to read database secret
lambda_function.add_to_role_policy(
_iam.PolicyStatement(
resources=[secret_arn],
actions=["secretsmanager:GetSecretValue"],
)
)
# allow lambda to execute query on the database
lambda_function.add_to_role_policy(
_iam.PolicyStatement(
resources=[db_ref],
actions=[
"rds-data:ExecuteStatement",
"rds-data:BatchExecuteStatement",
],
)
)
# assign policies to the Lambda function so it can output to CloudWatch Logs.
lambda_function.add_to_role_policy(
_iam.PolicyStatement(
resources=["*"],
actions=[
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents",
],
)
)
resource = cfn.CustomResource(
self,
"Resource",
provider=cfn.CustomResourceProvider.lambda_(lambda_function),
properties=kwargs,
)
self.response = resource.get_att("Response").to_string()
Creating Serverless Aurora Instance
With the custom resource defined, we can create the serverless aurora stack with this code:
# filename: migration.py
import os
from aws_cdk import (
aws_ec2 as ec2,
aws_rds as rds,
core,
aws_secretsmanager as sm,
)
from .migration import SchemaMigrationResource
class RDSStack(core.Stack):
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
vpc = ec2.Vpc(self, "VPC")
db_master_user_name = os.getenv("DB_USERNAME", "admin_user")
self.secret = rds.DatabaseSecret(
self, id="MasterUserSecret", username=db_master_user_name
)
rds.CfnDBSubnetGroup(
self,
"rdsSubnetGroup",
db_subnet_group_description="private subnets for rds",
subnet_ids=vpc.select_subnets(
subnet_type=ec2.SubnetType.PRIVATE
).subnet_ids,
)
db_name = os.getenv("DB_NAME", "anonfed")
self.db = rds.CfnDBCluster(
self,
"auroraCluster",
engine="aurora-mysql",
engine_version="5.7.mysql_aurora.2.08.1",
db_cluster_parameter_group_name="default.aurora-mysql5.7",
# snapshot_identifier="<snapshot_arn>", # your snapshot
engine_mode="serverless",
scaling_configuration=rds.CfnDBCluster.ScalingConfigurationProperty(
auto_pause=True,
min_capacity=1,
max_capacity=4,
seconds_until_auto_pause=300,
),
db_subnet_group_name=core.Fn.ref("rdsSubnetGroup"),
database_name=db_name,
master_username=self.secret.secret_value_from_json("username").to_string(),
master_user_password=self.secret.secret_value_from_json(
"password"
).to_string(),
enable_http_endpoint=True,
)
secret_attached = sm.CfnSecretTargetAttachment(
self,
id="secret_attachment",
secret_id=self.secret.secret_arn,
target_id=self.db.ref,
target_type="AWS::RDS::DBCluster",
)
secret_attached.node.add_dependency(self.db)
db_ref = f"arn:aws:rds:{self.region}:{self.account}:cluster:{self.db.ref}"
migration = SchemaMigrationResource(
self, "schemamigration", self.secret.secret_arn, db_name, db_ref
)
# Publish the custom resource output
core.CfnOutput(
self,
"ResponseMessage",
description="Database Migration",
value=migration.response,
)
Finally, we can hook everything together in app.py
env_EU = core.Environment(
account=os.environ.get("CDK_DEPLOY_ACCOUNT", os.environ["CDK_DEFAULT_ACCOUNT"]),
region=os.environ.get("CDK_DEPLOY_REGION", os.environ["CDK_DEFAULT_REGION"]),
)
app = core.App()
db = RDSStack(scope=app, id="aurora", env=env_EU)
ap_stack = Api(scope=app, id="api", env=env_EU, db=db)
app.synth()
Conclusion
In this post, we've seen how to migrate database schema using cloud formation custom resource. To build on this knowledge, I encourage you to take a look at the full project in this repository.
Top comments (0)