May 2017
Beginner
552 pages
28h 47m
English
This script uses lynx and curl to find the broken links on a web page:
#!/bin/bash
#Filename: find_broken.sh
#Desc: Find broken links in a website
if [ $# -ne 1 ];
then
echo -e "$Usage: $0 URL\n"
exit 1;
fi
echo Broken links:
mkdir /tmp/$$.lynx
cd /tmp/$$.lynx
lynx -traversal $1 > /dev/null
count=0;
sort -u reject.dat > links.txt
while read link;
do
output=`curl -I $link -s \
| grep -e "HTTP/.*OK" -e "HTTP/.*200"`
if [[ -z $output ]];
then
output=`curl -I $link -s | grep -e "HTTP/.*301"`
if [[ -z $output ]];
then
echo "BROKEN: $link"
let count++
else
echo "MOVED: $link"
fi
fi
done < links.txt
[ $count -eq 0 ] && echo No broken links found.