AWS states that Graviton (ARM) is 30% faster than x86, however, I find out that FFT on the Graviton is slower than about 10% compared with the x86. Below is setup and results, hopefully, I have not correctly setup the numpy for Graviton. Anyway, multithread is 4x faster than single thread, in this case the 10240MB lambda has 6 vCPUs, I think
Lambda configuration
- memory 10240 MB
- timeout 90 seconds
- deploy via ecr image and CDK
- EC2 ARM64 to build the image for the Graviton
- language python
- numpy 1.22.1 and numpy.fft.fft
- np.fft.fft(np.random.randint(0, 1000, (4098, 600)))
I deploy lambda by ecr image
Docker file
FROM public.ecr.aws/lambda/python:3.8
# create code dir inside container
RUN mkdir ${LAMBDA_TASK_ROOT}/source
# copy code to container
COPY . ${LAMBDA_TASK_ROOT}/source
# copy handler function to container
COPY ./handler.py ${LAMBDA_TASK_ROOT}
# install dependencies for running time environment
RUN pip3 install -r ./source/requirements.txt --target "${LAMBDA_TASK_ROOT}"
# set the CMD to your handler
CMD [ "handler.lambda_handler"]
lambda handler
import json
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
def single_thread_fft(sig):
"""
normal fft
"""
start_time = datetime.now()
for x in sig:
np.fft.fft(x, axis=0)
end_time = datetime.now()
delta_time = end_time.timestamp() - start_time.timestamp()
print("single thread running time {0} ms".format(delta_time * 1000))
return delta_time
def multi_thread_fft(sig):
"""
thread fft
"""
start_time = datetime.now()
with ThreadPoolExecutor(max_workers=4) as executor:
for x in sig:
executor.submit(np.fft.fft, x, axis=0)
end_time = datetime.now()
delta_time = end_time.timestamp() - start_time.timestamp()
print("multi thread running time {0} ms".format(delta_time * 1000))
return delta_time
def lambda_handler(event, context):
"""
Lambda handler
"""
# signal for one channel
sig = [np.random.randint(0, 1000, (4098, 600)) for k in range(4)]
# single thread
single_thread_time = single_thread_fft(sig)
# multi thread
multi_thread_time = multi_thread_fft(sig)
# response
return {
'statusCode': 200,
'headers': {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers": "Content-Type",
"Access-Control-Allow-Methods": "OPTIONS,GET"
},
'body': json.dumps({"single thread: {0}, multi thread: {1}".format(single_thread_time * 1000, multi_thread_time*1000)},
indent=4,
sort_keys=True,
default=str)
}
CDK lambda api gateway stack
class LambdaFFTArm(Stack):
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
super().__init__(scope, construct_id, **kwargs)
# The code that defines your stack goes here
handler = aws_lambda.Function(
self,
id="LambdaFFTArm",
code=aws_lambda.EcrImageCode.from_ecr_image(
repository=aws_ecr.Repository.from_repository_name(
self,
id="LambdaFFTArmImage",
repository_name="lambda-fft-arm-image"
)
),
architecture=aws_lambda.Architecture.ARM_64,
handler=aws_lambda.Handler.FROM_IMAGE,
runtime=aws_lambda.Runtime.FROM_IMAGE,
memory_size=10240,
timeout=Duration.seconds(90),
)
# api gateway
api_gw = aws_apigateway.LambdaRestApi(
self,
id="ApiLambdaFFTArm",
handler=handler
)
# get api endpoint
self.url_output = CfnOutput(self, "Url", value=api_gw.url)
Top comments (2)
thanks for the info
Thank you for your information. It help me to consider between options for my projects.