Python script to check HTTP status and redirect chains

Here’s a quick and dirty Python script that goes through a list of URLs, and for each URL checks HTTP status codes (200, 301, 404, etc.), along with number of redirects and redirect chains (each redirect destination).  Saves output into a text file (you can modify to save to CSV, but I used text import feature in Excel to import tab-delimited data).

Well, damn hosted WordPress doesn’t let me paste code correctly, so here’s the link to Gist.

import requests
def get_status_code(url):
try:
r = requests.get(url)
print "Processing " + url
if len(r.history) > 0:
chain = ""
code = r.history[0].status_code
final_url = r.url
for resp in r.history:
chain += resp.url + " | "
return str(code) + '\t' + str(len(r.history)) + '\t' + chain + '\t' + final_url + '\t'
else:
return str(r.status_code) + '\t\t\t\t'
except requests.ConnectionError:
print("Error: failed to connect.")
return '0\t\t\t\t'
input_file = 'urls.txt'
output_file = 'output.txt'
with open(output_file, 'w') as o_file:
o_file.write('URL\tStatus\tNumber of redirects\tRedirect Chain\tFinal URL\t\n')
f = open(input_file, "r")
lines = f.read().splitlines()
for line in lines:
code = get_status_code(line)
o_file.write(line + "\t" + str(code) + "\t\n")
f.close()
view raw gistfile1.txt hosted with ❤ by GitHub

Update: and here’s how to post Gist properly:

Screen Shot 2015-10-01 at 10.59.23 AM