DEV Community

Manish Kumar Shah
Manish Kumar Shah

Posted on

Django Same data objects get created every time!

Here's the models.py file

from django.db import models
from django.utils.text import slugify

class News(models.Model):
    title         =   models.CharField(max_length=120)
    datess        =   models.CharField(max_length=120)
    linkss        =   models.CharField(max_length=120)
    slug          =   models.SlugField(blank=True, null=True)

    def save(self, *args, **kwargs):
        if not self.slug and self.title:
            self.slug = slugify(self.title)
        super(News, self).save(*args, **kwargs)

    class Meta:
        verbose_name_plural = "news"

    def __str__(self):
        return f'{self.title}'

    def get_absolute_url(self):
        return f"/news/{self.slug}"
Enter fullscreen mode Exit fullscreen mode

Here's the views.py file

from django.shortcuts import render
from .models import News
from django.core.paginator import Paginator
from django.db.models import Q
# For scraping part
import requests
from bs4 import BeautifulSoup


def news_list(request, *args, **kwargs):
    # fOR scraping part - START::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    response = requests.get("http://www.iitg.ac.in/home/eventsall/events")
    soup = BeautifulSoup(response.content,"html.parser")
    cards = soup.find_all("div", attrs={"class": "newsarea"})

    iitg_title = []
    iitg_date = []
    iitg_link = []
    for card in cards[0:6]:
        iitg_date.append(card.find("div", attrs={"class": "ndate"}).text)
        iitg_title.append(card.find("div", attrs={"class": "ntitle"}).text.strip())
        iitg_link.append(card.find("div", attrs={"class": "ntitle"}).a['href'])
    # fOR scraping part - END::::::::::::::::::::::::::::::::::::::::::::::::::::::::

    # fOR storing the scraped data directly into the dtatbase from the views.py file - START---------------------------------------------------------------
    for i in range(len(iitg_title)):
        News.objects.create(title = iitg_title[i], datess = iitg_date[i], linkss = iitg_link[i])
    # fOR storing the scraped data directly into the dtatbase from the views.py file - END-----------------------------------------------------------------

    queryset = News.objects.all()   #Getting all the objects from the database

    search_query = request.GET.get('q')
    if search_query:
        queryset = queryset.filter(
            Q(title__icontains = search_query) |
            Q(description__icontains = search_query)
        )

    paginator = Paginator(queryset, 5)  #Adding pagination
    page_number = request.GET.get('page')
    queryset = paginator.get_page(page_number)

    context = {
       'object_list': queryset
    }

    return render(request, 'news_list.html', context)
Enter fullscreen mode Exit fullscreen mode

With this above code each time I refresh the webpage the same data objects get's created again and again, How to solve this issue?

Top comments (1)

Collapse
 
j_mplourde profile image
Jean-Michel Plourde

Each time the code is loaded, the create function is called but there is nothing to prevent duplicates. For a web scraper, it's a good idea to create a cool down feature with an off-switch for debugging.

 for i in range(len(iitg_title)):
        News.objects.create(title = iitg_title[i], datess = iitg_date[i], linkss = iitg_link[i])
Enter fullscreen mode Exit fullscreen mode