Trinity Past Paper downloader

This is a tool I made to download all the past papers for a module as it is very annoying to waste time downloading the past papers from every year manually. It is not perfect and I still need to implement searching for papers past 2012. In the future I will implement automatically grabbing all the modules in a given course and year. You can find the python code below or download a compiled exe here.

import os
import re
import requests
from bs4 import BeautifulSoup
from datetime import datetime

def download(url: str, dest_folder: str, filename: str):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)  # create folder if it does not exist

    #filename = url.split('/')[-1].replace(" ", "_")  # be careful with file names
    file_path = os.path.join(dest_folder, filename)

    r = requests.get(url, stream=True)
    if r.ok:
        print("saving to", os.path.abspath(file_path))
        with open(file_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024 * 8):
                if chunk:
                    f.write(chunk)
                    f.flush()
                    os.fsync(f.fileno())
    else:  # HTTP status code 4XX/5XX
        print("Download failed: status code {}\n{}".format(r.status_code, r.text))

#module = input("Type module code")
save_location = "Past Papers/"
auth = ""
code = 0
while code != 200:
    if os.path.isfile("Trinity_password") and code == 0:
        auth = open("Trinity_password").read()
    else:
        user = input("Enter your tcd username to continue: ")
        password = input("Enter your tcd password to continue: ")
        auth = user+":"+password

    code = requests.get("https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual/",stream=True).status_code
    print("----------------------")
    if code >= 300 and code <= 308:
        print("Site moved or redirected contact me to update this tool")
    elif code == 401:
        print("Unauthorized make sure you entered the right user and password")
    elif code == 403:
        print("Your user does not have permission to access these files")
    elif code == 418:
        print("Server can not make coffee as it is a teapot")
    elif code == 429:
        print("Your IP has sent too many requests please wait")
    elif code >= 400 and code <= 451:
        print("Bad request")
    elif code >= 500 and code < 600:
        print("Server error. Check if https://www.tcd.ie/ is online or contact tcd support.")
    if code != 200:
        print("")

print("Successfully connected")
print("----------------------")
if not os.path.isfile("Trinity_password"):
    print("Saving password")
    open("Trinity_password","x").write(auth)
    print("----------------------")

modules = []
year = datetime.now().year
download_all = False
modulename = input("Input the name of the module you want to download (type CODE to input the module codes instead)\nType ALL to download all Trinity past papers:\n")#.upper()
if modulename == "CODE":
    modulename = ""
    while modules[-1] != "":
        modules.append(input("Input the module codes you want to download (press enter to finish)"))
if modulename == "ALL":
    modulename = ""
    download_all = True

while 1:
    yearcode = str(year-1)[2:]+str(year)[2:]
    if year>2022:
        index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/"+yearcode
    elif year == 2022:
        index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual-2021-22"
    elif year>2012:
        index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual-"+yearcode#+"/"
    else:
        break#TODO Use old site before 2012 https://www.tcd.ie/Local/Exam_Papers/
    print("--------------")
    print("Getting year",year)
    response = requests.get(index_url,stream=True)
    #print(index_url)
    print("Response:",response.status_code)
    soup = BeautifulSoup(response.content,"html.parser")
    static_url = "https://"+auth+"@www.tcd.ie/academicregistry/exams"
    found = False
    for code in modules:
        if soup.find(string=code) != None:
            found = True
            print("Previous module found")
            break
    if modulename != "" and not found:
        element = soup.find(string=modulename)
        if element == None:
            element = soup.find(string=re.compile(r"%s"% (modulename),re.IGNORECASE))
        modulecode = ""
        if element != None:
            modulecode = element.find_next('td').string
            if modulecode==None:
                modulecode = element.find_next('a').string
            print("Found code",modulecode,"for module name",element.string)
            modules.append(modulecode)

    for section in soup.find_all('a'):
        link = section.get('href')
        #print(link,section.text)
        if link == None or (link[-4:].lower() != ".pdf"):
            continue
        get = download_all
        if not download_all:
            for module in modules:
                if section.text == module:
                    get = True
                    break 
        if get:
            #print(link,section.text)
            foldername=""
            if modulename != "":
                foldername = modulename
            else:
                foldername = section.text
            filename=link.split("/")[-1][:-4]+" "+str(year)+".pdf"
            #print(save_location+foldername+filename)
            if not os.path.isfile(save_location+foldername+"/"+filename):
                try:
                    download(static_url+link[5:], save_location+foldername,filename)
                except:
                    pass
            else:
                print(filename,"already exists on disk.")
    year -= 1
input("All available papers downloaded into ~/Past Papers")

Comments