DEV Community

Deepanshu Udhwani
Deepanshu Udhwani

Posted on

Checking properties in API RESPONSE using json path asynchronously

`import asyncio
import aiohttp
import time
import csv
import json
from jsonpath_ng import jsonpath, parse


async def validate_route_page():
    with open('/Users/deepanshuudhwani/PycharmProjects/pythonProject3/pythontest.csv', 'r') as file:
        reader = csv.reader(file)
        route_data = []
        route_temp = []
        for idx, row in enumerate(reader):
            route_temp.append(row[:2])
            if not (idx + 1) % 20:
                route_data.append(route_temp)
                route_temp = []

    route_data.append(route_temp)

    with open('dict.csv', 'w') as file2:
        # writer = csv.writer(file2)

        header = ['meta_title', 'meta_description', 'meta_keywords', 'meta_breadcrumb_title', 'q_filter']

        writer = csv.writer(file2)
        async with aiohttp.ClientSession() as session:
            for rd in route_data:
                tasks = [scrap_data(j, session) for j in rd]
                result_data = await asyncio.gather(*tasks)
                time.sleep(1)
                for data in result_data:
                    writer.writerow(data)


async def scrap_data(row, session):
    result_data = []
    url = row[0]
    try:
        async with session.get('https://' + url) as res:
            if not res.status == 200:
                return [url, 'timeout']
            data = await res.read()


    except Exception as e:
        data = [url, 'error']
        print(url, 'timeout')
        return data

    response_info = json.loads(data)

    if data:

        try:
            jsonpath_expr = parse('$..seo_meta.meta_title')
            meta_title = jsonpath_expr.find(response_info)
            if meta_title.__len__() < 1:
                meta_title = False
            else:
                meta_title = True

        except Exception as e:
            meta_title = ''

        try:
            jsonpath_expr = parse('$..seo_meta.meta_description')
            meta_description = jsonpath_expr.find(response_info)
            if meta_description.__len__() < 1:
                meta_description = False
            else:
                meta_description = True

        except Exception as e:
            meta_description = ''

        try:
            jsonpath_expr = parse('$..seo_meta.meta_keywords')
            meta_keywords = jsonpath_expr.find(response_info)
            if meta_keywords.__len__() < 1:
                meta_keywords = False
            else:
                meta_keywords = True

        except Exception as e:
            meta_keywords = ''

        try:
            jsonpath_expr = parse('$..seo_meta.breadcrumb[2].title')
            meta_breadcrumb_title = jsonpath_expr.find(response_info)
            if meta_breadcrumb_title[0].context.value.__len__() < 1:
                meta_breadcrumb_title = False
            else:
                meta_breadcrumb_title = True


        except Exception as e:
            meta_breadcrumb_title = ''

        try:
            jsonpath_expr = parse('$..q_filter[0].heading')
            q_filter = jsonpath_expr.find(response_info)
            if q_filter.__len__() < 1:
                q_filter = False
            else:
                q_filter = True

        except Exception as e:
            q_filter = ''

        data = [url,meta_title, meta_description, meta_keywords, meta_breadcrumb_title, q_filter]
        print(data)


    else:

        data = ['meta_title_not_found', 'meta_description_not_found', 'meta_keywords_not_found',
                'meta_breadcrumb_title_not_found', 'q_filter_not_found']
    return data

    if __name__ == "__main__":
        asyncio.run(validate_route_page())

`
Enter fullscreen mode Exit fullscreen mode

Top comments (0)