Here's the models.py file
from django.db import models
from django.utils.text import slugify
class News(models.Model):
title = models.CharField(max_length=120)
datess = models.CharField(max_length=120)
linkss = models.CharField(max_length=120)
slug = models.SlugField(blank=True, null=True)
def save(self, *args, **kwargs):
if not self.slug and self.title:
self.slug = slugify(self.title)
super(News, self).save(*args, **kwargs)
class Meta:
verbose_name_plural = "news"
def __str__(self):
return f'{self.title}'
def get_absolute_url(self):
return f"/news/{self.slug}"
Here's the views.py file
from django.shortcuts import render
from .models import News
from django.core.paginator import Paginator
from django.db.models import Q
# For scraping part
import requests
from bs4 import BeautifulSoup
def news_list(request, *args, **kwargs):
# fOR scraping part - START::::::::::::::::::::::::::::::::::::::::::::::::::::::::
response = requests.get("http://www.iitg.ac.in/home/eventsall/events")
soup = BeautifulSoup(response.content,"html.parser")
cards = soup.find_all("div", attrs={"class": "newsarea"})
iitg_title = []
iitg_date = []
iitg_link = []
for card in cards[0:6]:
iitg_date.append(card.find("div", attrs={"class": "ndate"}).text)
iitg_title.append(card.find("div", attrs={"class": "ntitle"}).text.strip())
iitg_link.append(card.find("div", attrs={"class": "ntitle"}).a['href'])
# fOR scraping part - END::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# fOR storing the scraped data directly into the dtatbase from the views.py file - START---------------------------------------------------------------
for i in range(len(iitg_title)):
News.objects.create(title = iitg_title[i], datess = iitg_date[i], linkss = iitg_link[i])
# fOR storing the scraped data directly into the dtatbase from the views.py file - END-----------------------------------------------------------------
queryset = News.objects.all() #Getting all the objects from the database
search_query = request.GET.get('q')
if search_query:
queryset = queryset.filter(
Q(title__icontains = search_query) |
Q(description__icontains = search_query)
)
paginator = Paginator(queryset, 5) #Adding pagination
page_number = request.GET.get('page')
queryset = paginator.get_page(page_number)
context = {
'object_list': queryset
}
return render(request, 'news_list.html', context)
With this above code each time I refresh the webpage the same data objects get's created again and again, How to solve this issue?
Top comments (1)
Each time the code is loaded, the create function is called but there is nothing to prevent duplicates. For a web scraper, it's a good idea to create a cool down feature with an off-switch for debugging.