зеркало из https://github.com/github/docs.git
135 строки
3.9 KiB
Plaintext
135 строки
3.9 KiB
Plaintext
|
#!/usr/bin/env bash
|
||
|
|
||
|
# [start-readme]
|
||
|
# The script is run once per day via a scheduled GitHub Action to check all links in the site. It automatically opens an issue if it finds broken links.
|
||
|
# To exclude a URL from the link check, add it to `lib/excluded-links.js`.
|
||
|
#
|
||
|
# For checking internal links, see `script/check-internal-links`.
|
||
|
# [end-readme]
|
||
|
|
||
|
internal=""
|
||
|
while getopts "h?i" opt; do
|
||
|
case "${opt}" in
|
||
|
h|\?) echo "Usage:"
|
||
|
echo " script/check-external-links [OPTIONS] [two-letter language code]"
|
||
|
echo ""
|
||
|
echo " script/check-external-links -i Check internal links. Without this flag, check all links."
|
||
|
echo " script/check-external-links -h Display this help message."
|
||
|
exit 0
|
||
|
;;
|
||
|
i) internal=" --internalOnly"
|
||
|
;;
|
||
|
esac
|
||
|
done
|
||
|
shift $((OPTIND -1))
|
||
|
|
||
|
if [ -z "${1}" ]
|
||
|
then
|
||
|
echo "error: must provide two-letter language code"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
languageCode=${1}
|
||
|
|
||
|
# Pass options to script to construct blc command
|
||
|
blcCommand="$(./script/get-blc-command.js ${internal} --language ${languageCode})"
|
||
|
|
||
|
# Exit if script returned an error
|
||
|
if test $? -eq 1
|
||
|
then
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
# Determine logfile name based on options
|
||
|
logfile=""
|
||
|
if [ -z "${internal}" ]
|
||
|
then
|
||
|
logfile="blc_output.log"
|
||
|
else
|
||
|
logfile="blc_output_internal.log"
|
||
|
fi
|
||
|
|
||
|
# Kill any server running in the background, then start the server
|
||
|
killall node >/dev/null 2>&1
|
||
|
node server.js >/dev/null &
|
||
|
sleep 5
|
||
|
|
||
|
host="http://localhost:4000"
|
||
|
|
||
|
# Check whether localhost is accessible
|
||
|
hostStatus=$(curl -I --silent "${host}" | head -1)
|
||
|
isHostOK=$(echo "${hostStatus}" | grep "[2|3][0-9][0-9]")
|
||
|
if [ -z "${isHostOK}" ]
|
||
|
then
|
||
|
echo "Can't connect to ${host}!"
|
||
|
echo ${hostStatus}
|
||
|
echo ${isHostOK}
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
# Execute blc and save output
|
||
|
${blcCommand[@]} > ${logfile}
|
||
|
|
||
|
# We're done with the server now, so end the process
|
||
|
# killall node will also terminate this script, so find and kill the specific pid
|
||
|
pid=$(ps aux | grep "node server.js" | grep -v "grep" | awk '{ print $2 }'); kill -INT $pid >/dev/null 2>&1
|
||
|
|
||
|
# Recheck "403 Forbidden" results due to a bug
|
||
|
# https://github.com/stevenvachon/broken-link-checker/issues/58
|
||
|
# Also recheck "429" GitHub results
|
||
|
urlsToRecheck=$(egrep "HTTP_4(03|29)" ${logfile} | grep -o "http.* ")
|
||
|
|
||
|
if [ ! -z "${urlsToRecheck}" ]
|
||
|
then
|
||
|
for url in ${urlsToRecheck}
|
||
|
do
|
||
|
# Curl each URL and grep for 4xx or 5xx in status code response
|
||
|
status=$(curl -I --silent "${url}" | head -1 | grep "[4|5][0-9][0-9]")
|
||
|
if [ -z "${status}" ]
|
||
|
then
|
||
|
# If no 4/5xx found, the link is NOT really broken, so remove it from the list
|
||
|
# This command needs to work in all implementations of sed (Mac/GNU/etc)
|
||
|
sed -i'.bak' -e "s|^.*$url.*$||" ${logfile}
|
||
|
# Remove backup file
|
||
|
find . -name "${logfile}.bak" | xargs rm
|
||
|
fi
|
||
|
done
|
||
|
fi
|
||
|
|
||
|
# Count number of broken links in output
|
||
|
# Ignore "308 Permanent Redirect" results, which are not actually broken
|
||
|
numberOfBrokenLinks=$(grep "BROKEN" ${logfile} | grep -vc HTTP_308)
|
||
|
brokenLinks=$(grep "BROKEN" ${logfile} | grep -v HTTP_308)
|
||
|
|
||
|
# If broken links are found, exit with status 1 so the check run fails
|
||
|
if [ ${numberOfBrokenLinks} -gt 0 ]
|
||
|
then
|
||
|
# Print "links" or "link" in message depending on the number found
|
||
|
if [ ${numberOfBrokenLinks} -gt 1 ]
|
||
|
then
|
||
|
linkOrLinks="links"
|
||
|
else
|
||
|
linkOrLinks="link"
|
||
|
fi
|
||
|
echo -e "\n${numberOfBrokenLinks} broken ${linkOrLinks} found on help.github.com\n"
|
||
|
echo -e "Note: links that start with 'http://localhost:4000/' are internal links.\n"
|
||
|
|
||
|
# List broken links
|
||
|
echo "${brokenLinks}"
|
||
|
|
||
|
# Update final number of broken links
|
||
|
echo -e "\n$(tail -2 ${logfile})" | sed "s|. [0-9]* broken.|. ${numberOfBrokenLinks} broken.|"
|
||
|
|
||
|
# Exit without failure when checking all links so script/open-broken-links-issue can run
|
||
|
if [ -z "${internal}" ]
|
||
|
then
|
||
|
exit 0
|
||
|
else
|
||
|
exit 1
|
||
|
fi
|
||
|
else
|
||
|
echo "All links are good!"
|
||
|
echo -e "\n$(tail -2 ${logfile})"
|
||
|
exit 0
|
||
|
fi
|