DEV Community

Linusx
Linusx

Posted on

Enjoy this hell of a script I made, completely unreadable, and will most likely break sometime, but rn, it works perfectly

import csv
import bs4
import twill.commands as twill
from twill import browser
from login_stuff import secrets

timetable_url = 'https://ergogym.eltern-portal.org/service/stundenplan'


def get_html():
    twill.go(timetable_url)
    twill.form_value(1,'username', secrets['username'])
    twill.form_value(1,'password', secrets['password'])
    twill.submit(1)
    twill.go(timetable_url)

    return browser.html


html = get_html()
soup = bs4.BeautifulSoup(html, 'html.parser')


def get_courses(html):
    course_table = [table for table in soup.find_all('table') if 'Kurse' in table.text][0]
    courses = [course.text.split(',')[0] for course in course_table.find_all('td') if ',' in course.text]

    return courses



def parse_to_room_dict(field_string):

    # Split the string into two parts: the identifiers and the numbers
    identifiers_part, numbers_part = field_string.strip().split('\n')

    # Split both parts into individual elements
    identifiers = identifiers_part.split('/')
    numbers = numbers_part.split('/')

    # Create the dictionary by zipping the identifiers with the numbers
    result_dict = dict(zip(identifiers, numbers))

    return result_dict

def find_course_room(courses_dict, my_courses):
    # Find the intersection between the keys of the dictionary and the list of courses
    intersection = set(courses_dict.keys()) & set(my_courses)

    if len(intersection) > 0:
        course = intersection.pop()  # Get the only course that's common
        room = courses_dict[course]  # Get the room for this course
        if room == '':
            return course
        return f'{course[1:].lower()} - {room}'  # Return the formatted string (course, room)

    return None

def get_timetable(html, courses=get_courses(html)):
    timetable = [table for table in soup.find_all('table') if 'Montag' in table.text][0]

    # Create a deep copy of the timetable by converting to a string and re-parsing
    timetable_copy = bs4.BeautifulSoup(str(timetable), 'html.parser')


    for br in timetable_copy.find_all('br'):
        br.insert_before('\n')


    timetable_array = [[td.text for td in row.find_all('td')] for row in timetable_copy.find_all('tr')]
    timetable_array[0] = ['','Montag','Dienstag','Mittwoch','Donnerstag','Freitag']

    for row_id in range(1,len(timetable_array)):
        timetable_array[row_id][0] = timetable_array[row_id][0].replace('\n',' ')

    for row_idx, row in enumerate(timetable_array):
        for col_idx, field in enumerate(row):
            if row_idx > 0 and col_idx > 0:
                field = timetable_array[row_idx][col_idx]

                if '/' in field:

                    course_room = find_course_room(parse_to_room_dict(field), courses)

                    if course_room is not None:
                        timetable_array[row_idx][col_idx] = course_room
                    else:
                        timetable_array[row_idx][col_idx] = ''
                else:
                    timetable_array[row_idx][col_idx] = ''


    timetable_array = [row for row in timetable_array if any(field != '' for field in row[1:])]

    return timetable_array
def save_to_file(timetable_array, courses):
    timetable_array.append([])
    timetable_array.append(['Kurse:',', '.join(courses)])
    with open('timetable.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile,delimiter=',')
        writer.writerows(timetable_array)

from reportlab.lib.pagesizes import A4, landscape
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics

def save_to_pdf(timetable_array, courses, width_cm=19, height_cm=11, output='timetable.pdf'):
    pdfmetrics.registerFont(TTFont('Inter','InterVariable.ttf'))
    pdfmetrics.registerFontFamily('Inter',normal='Inter')


    # Convert cm to points
    width = width_cm * cm
    height = height_cm * cm

    doc = SimpleDocTemplate(output, pagesize=landscape(A4))

    col_width = width / len(timetable_array[0])
    row_height = height / len(timetable_array)

    num_cols = len(timetable_array[0])
    col_width = width / num_cols

    # Calculate new column widths
    first_col_width = col_width * 1.3
    remaining_col_width = (width - first_col_width) / (num_cols - 1)

    # Define table with adjusted column widths and row heights
    table = Table(timetable_array, 
                  colWidths=[first_col_width] + [remaining_col_width] * (num_cols - 1), 
                  rowHeights=[row_height] * len(timetable_array))

    # Apply table style for borders and larger font size
    table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTNAME', (0, 0), (-1, -1), 'Inter'),  # Use bold font
        ('FONTSIZE', (0, 0), (-1, -1), 14),  # Increase font size
        ('GRID', (0, 0), (-1, -1), 1, colors.black)  # Add gridlines
    ]))

    table.hAlign = 'LEFT'

    # Add the courses as a paragraph at the top
    styles = getSampleStyleSheet()
    course_text = f"Kurse: {', '.join(courses)}"
    course_paragraph = Paragraph(course_text, ParagraphStyle(name='CourseParagraph', fontName='Inter', fontSize=12, spaceBefore=cm))

    # Build the PDF
    doc.build([table, course_paragraph])


if __name__ == '__main__':
    courses = get_courses(html)


    leopold_courses = ['1k3', '1E', '1M1', '1D1', '1ku1', '1pug4', '1ph2','1ew','1g3','1geo2','1b1','1smw3'] 

    save_to_pdf(get_timetable(html,courses),courses)
    save_to_pdf(get_timetable(html,leopold_courses),leopold_courses,output='timetable_leopold.pdf')

Enter fullscreen mode Exit fullscreen mode

Top comments (0)

Image of Bright Data

Maintain Seamless Data Collection – No more rotating IPs or server bans.

Avoid detection with our dynamic IP solutions. Perfect for continuous data scraping without interruptions.

Avoid Detection