I wrote a script that calculates the minimum, average, and maximum time taken from the recent execution history of the Glue Job and displays it along with the Timeout value set for the Glue Job.
It is displayed in this way.
name timeout min avg max
gluejob-test2 172800 36 41 44
gluejob-test2-01 172800 52 54 57
longgluejob-sample1 172800 2621 3071 3725
longgluejob-sample2 172800 20837 22760 27465
The unit of time is in seconds.
The source code is like this. It is written in Python.
import sys
import boto3
def main():
profile = sys.argv[1]
list_jobs(profile)
def list_jobs(profile):
session = boto3.session.Session(profile_name=profile)
glue_client = session.client("glue")
jobs = fetch_glue_job_list(glue_client)
print("name timeout min avg max")
for job in jobs:
job_name = job["Name"]
timeout = job["Timeout"] * 60 # the values returned by the API are in minutes.
history = fetch_glue_job_history(glue_client, job_name)
job_name_str = job_name.ljust(70)
timeout_str = str(timeout).rjust(6)
history_str = " ".join([str(h).rjust(6) for h in history])
print(f"{job_name_str} {timeout_str} {history_str}")
def fetch_glue_job_list(glue_client):
jobs = []
res = glue_client.get_jobs()
while True:
for elem in res["Jobs"]:
jobs.append(elem)
if "NextToken" not in res:
break
res = glue_client.get_jobs(NextToken=res["NextToken"])
return jobs
def fetch_glue_job_history(glue_client, job_name):
MAX_COUNT = 100 # retrieve 100 as the recent execution history
history = []
res = glue_client.get_job_runs(JobName=job_name)
while True:
for run in res["JobRuns"]:
if run["JobRunState"] == "SUCCEEDED":
history.append(run["ExecutionTime"])
if len(history) >= MAX_COUNT:
break
if len(history) >= MAX_COUNT:
break
if "NextToken" not in res:
break
res = glue_client.get_job_runs(JobName=job_name, NextToken=res["NextToken"])
if history:
avg_value = int(sum(history) / len(history))
max_value = max(history)
min_value = min(history)
else:
avg_value = 0
max_value = 0
min_value = 0
return [min_value, avg_value, max_value]
main()
Top comments (0)