`import asyncio
import aiohttp
import time
import csv
import json
from jsonpath_ng import jsonpath, parse
async def validate_route_page():
with open('/Users/deepanshuudhwani/PycharmProjects/pythonProject3/pythontest.csv', 'r') as file:
reader = csv.reader(file)
route_data = []
route_temp = []
for idx, row in enumerate(reader):
route_temp.append(row[:2])
if not (idx + 1) % 20:
route_data.append(route_temp)
route_temp = []
route_data.append(route_temp)
with open('dict.csv', 'w') as file2:
# writer = csv.writer(file2)
header = ['meta_title', 'meta_description', 'meta_keywords', 'meta_breadcrumb_title', 'q_filter']
writer = csv.writer(file2)
async with aiohttp.ClientSession() as session:
for rd in route_data:
tasks = [scrap_data(j, session) for j in rd]
result_data = await asyncio.gather(*tasks)
time.sleep(1)
for data in result_data:
writer.writerow(data)
async def scrap_data(row, session):
result_data = []
url = row[0]
try:
async with session.get('https://' + url) as res:
if not res.status == 200:
return [url, 'timeout']
data = await res.read()
except Exception as e:
data = [url, 'error']
print(url, 'timeout')
return data
response_info = json.loads(data)
if data:
try:
jsonpath_expr = parse('$..seo_meta.meta_title')
meta_title = jsonpath_expr.find(response_info)
if meta_title.__len__() < 1:
meta_title = False
else:
meta_title = True
except Exception as e:
meta_title = ''
try:
jsonpath_expr = parse('$..seo_meta.meta_description')
meta_description = jsonpath_expr.find(response_info)
if meta_description.__len__() < 1:
meta_description = False
else:
meta_description = True
except Exception as e:
meta_description = ''
try:
jsonpath_expr = parse('$..seo_meta.meta_keywords')
meta_keywords = jsonpath_expr.find(response_info)
if meta_keywords.__len__() < 1:
meta_keywords = False
else:
meta_keywords = True
except Exception as e:
meta_keywords = ''
try:
jsonpath_expr = parse('$..seo_meta.breadcrumb[2].title')
meta_breadcrumb_title = jsonpath_expr.find(response_info)
if meta_breadcrumb_title[0].context.value.__len__() < 1:
meta_breadcrumb_title = False
else:
meta_breadcrumb_title = True
except Exception as e:
meta_breadcrumb_title = ''
try:
jsonpath_expr = parse('$..q_filter[0].heading')
q_filter = jsonpath_expr.find(response_info)
if q_filter.__len__() < 1:
q_filter = False
else:
q_filter = True
except Exception as e:
q_filter = ''
data = [url,meta_title, meta_description, meta_keywords, meta_breadcrumb_title, q_filter]
print(data)
else:
data = ['meta_title_not_found', 'meta_description_not_found', 'meta_keywords_not_found',
'meta_breadcrumb_title_not_found', 'q_filter_not_found']
return data
if __name__ == "__main__":
asyncio.run(validate_route_page())
`
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)