DEV Community

Deepanshu Udhwani
Deepanshu Udhwani

Posted on

how to test components and API responses of a webpage

import json

import requests
from bs4 import BeautifulSoup
import csv
import numpy
import pandas as pd
import string
import asyncio
import aiohttp
import time
import csv
from lxml import etree

async def validate_route_page():
with open('unprocessed_data.csv', 'r') as file:
reader = csv.reader(file)
route_data = []
route_temp = []
i = 1
for idx, row in enumerate(reader):
route_temp.append(row[:6])
if not (idx + 1) % 10:
route_data.append(route_temp)
route_temp = []
print(i / 10000)
i = i + 1

route_data.append(route_temp)

with open('processed_data.csv', 'w+', newline='') as file2:
    writer = csv.writer(file2)
    writer.writerow(["serial_no", "url", "url_status", "page_rover_url", "page_rover_status"])
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
        for rd in route_data:
            tasks = [scrap_data(j, session) for j in rd]
            result_data = await asyncio.gather(*tasks)
            time.sleep(5)
            for data in result_data:
                writer.writerow(data)
Enter fullscreen mode Exit fullscreen mode

async def scrap_data(row, session):
url = row[3]
src = row[1]
dest = row[2]
page_rover_url = row[4]
id = row[0]

try:
    async with session.get(url, timeout=30) as res:
        if not res.status == 200:
            return [src, dest, 'timeout']
        data = res.content.read_nowait()
except Exception as e:
    data = [src, dest, 'Exception Found']
    print(src, dest, 'timeout')
    return data
mainSoup = BeautifulSoup(data, 'html.parser')

dom = etree.HTML(str(mainSoup))

a1 = dom.xpath('//div[3]/h2').__len__()  # popular trains from src1 to dest1
a2 = dom.xpath('//main/div[3]/p').__len__()  # 5found from
a3 = dom.xpath('//div[4]/h2').__len__()  # covid special trains from
a4 = dom.xpath('//div[5]/h2').__len__()  # other trains fron src to dest

if a1 >= 1 or a2 >= 1 or a3 >= 1 or a4 >= 1:
    url_status = True
else:
    url_status = False

################################
try:
    async with session.get(page_rover_url + "&format=json", timeout=10) as res:
        if not res.status == 200:
            return [src, dest, 'timeout']
        data = res.content.read_nowait()

except Exception as e:
    data = [src, dest, 'error']
    print(src, dest, 'timeout')
    return data

try:
    response_info = json.loads(data)
    q_filter = response_info['data']['q_filter'].__len__()
    if q_filter < 1:
        q_filter = False
    else:
        q_filter = True

except Exception as e:
    q_filter = 'Exception Found'

page_rover_status = q_filter

if data:
    data = [id, url, url_status, page_rover_url, page_rover_status]
    print(data)
else:
    data = [id, url, url_status, page_rover_url, page_rover_status]
    print(data)
return data
Enter fullscreen mode Exit fullscreen mode

if name == "main":
asyncio.run(validate_route_page())

Oldest comments (0)