Trinity Past Paper downloader

This is a tool I made to download all the past papers for a module as it is very annoying to waste time downloading the past papers from every year manually. It is not perfect and I still need to implement searching for papers past 2012. In the future I will implement automatically grabbing all the modules in a given course and year. You can find the python code below or download a compiled exe here.
import os
import re
import requests
from bs4 import BeautifulSoup
from datetime import datetime
def download(url: str, dest_folder: str, filename: str):
if not os.path.exists(dest_folder):
os.makedirs(dest_folder) # create folder if it does not exist
#filename = url.split('/')[-1].replace(" ", "_") # be careful with file names
file_path = os.path.join(dest_folder, filename)
r = requests.get(url, stream=True)
if r.ok:
print("saving to", os.path.abspath(file_path))
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())
else: # HTTP status code 4XX/5XX
print("Download failed: status code {}\n{}".format(r.status_code, r.text))
#module = input("Type module code")
save_location = "Past Papers/"
auth = ""
code = 0
while code != 200:
if os.path.isfile("Trinity_password") and code == 0:
auth = open("Trinity_password").read()
else:
user = input("Enter your tcd username to continue: ")
password = input("Enter your tcd password to continue: ")
auth = user+":"+password
code = requests.get("https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual/",stream=True).status_code
print("----------------------")
if code >= 300 and code <= 308:
print("Site moved or redirected contact me to update this tool")
elif code == 401:
print("Unauthorized make sure you entered the right user and password")
elif code == 403:
print("Your user does not have permission to access these files")
elif code == 418:
print("Server can not make coffee as it is a teapot")
elif code == 429:
print("Your IP has sent too many requests please wait")
elif code >= 400 and code <= 451:
print("Bad request")
elif code >= 500 and code < 600:
print("Server error. Check if https://www.tcd.ie/ is online or contact tcd support.")
if code != 200:
print("")
print("Successfully connected")
print("----------------------")
if not os.path.isfile("Trinity_password"):
print("Saving password")
open("Trinity_password","x").write(auth)
print("----------------------")
modules = []
year = datetime.now().year
download_all = False
modulename = input("Input the name of the module you want to download (type CODE to input the module codes instead)\nType ALL to download all Trinity past papers:\n")#.upper()
if modulename == "CODE":
modulename = ""
while modules[-1] != "":
modules.append(input("Input the module codes you want to download (press enter to finish)"))
if modulename == "ALL":
modulename = ""
download_all = True
while 1:
yearcode = str(year-1)[2:]+str(year)[2:]
if year>2022:
index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/"+yearcode
elif year == 2022:
index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual-2021-22"
elif year>2012:
index_url="https://"+auth+"@www.tcd.ie/academicregistry/exams/past-papers/annual-"+yearcode#+"/"
else:
break#TODO Use old site before 2012 https://www.tcd.ie/Local/Exam_Papers/
print("--------------")
print("Getting year",year)
response = requests.get(index_url,stream=True)
#print(index_url)
print("Response:",response.status_code)
soup = BeautifulSoup(response.content,"html.parser")
static_url = "https://"+auth+"@www.tcd.ie/academicregistry/exams"
found = False
for code in modules:
if soup.find(string=code) != None:
found = True
print("Previous module found")
break
if modulename != "" and not found:
element = soup.find(string=modulename)
if element == None:
element = soup.find(string=re.compile(r"%s"% (modulename),re.IGNORECASE))
modulecode = ""
if element != None:
modulecode = element.find_next('td').string
if modulecode==None:
modulecode = element.find_next('a').string
print("Found code",modulecode,"for module name",element.string)
modules.append(modulecode)
for section in soup.find_all('a'):
link = section.get('href')
#print(link,section.text)
if link == None or (link[-4:].lower() != ".pdf"):
continue
get = download_all
if not download_all:
for module in modules:
if section.text == module:
get = True
break
if get:
#print(link,section.text)
foldername=""
if modulename != "":
foldername = modulename
else:
foldername = section.text
filename=link.split("/")[-1][:-4]+" "+str(year)+".pdf"
#print(save_location+foldername+filename)
if not os.path.isfile(save_location+foldername+"/"+filename):
try:
download(static_url+link[5:], save_location+foldername,filename)
except:
pass
else:
print(filename,"already exists on disk.")
year -= 1
input("All available papers downloaded into ~/Past Papers")

Comments