aboutsummaryrefslogtreecommitdiff
path: root/test/requests/link_checker.py
diff options
context:
space:
mode:
authorzsloan2018-04-02 15:55:50 +0000
committerzsloan2018-04-02 15:55:50 +0000
commit7bd041f66cc4f2548a132e4d69d6533b81f005fc (patch)
treeee77d99cb35c85e7c27da7c55543631b44f1ab22 /test/requests/link_checker.py
parentb17b78a46aec1ca515bfef7168232020cf09a151 (diff)
parent92e89794fa57ef1570631403a8ceebf52a2cbc08 (diff)
downloadgenenetwork2-7bd041f66cc4f2548a132e4d69d6533b81f005fc.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r--test/requests/link_checker.py18
1 files changed, 15 insertions, 3 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py
index 256bf6ef..64553ed8 100644
--- a/test/requests/link_checker.py
+++ b/test/requests/link_checker.py
@@ -4,6 +4,8 @@ import requests
from lxml.html import parse
from requests.exceptions import ConnectionError
+DO_FAIL=False # fail on error
+
def is_root_link(link):
pattern = re.compile("^/$")
return pattern.match(link)
@@ -25,24 +27,34 @@ def get_links(doc):
, doc.cssselect("a")))
def verify_link(link):
+ if link[0] == "#":
+ # local link on page
+ return
+ print("verifying "+link)
try:
- result = requests.get(link, timeout=20)
+ result = requests.get(link, timeout=20, verify=False)
if result.status_code == 200:
print(link+" ==> OK")
+ elif result.status_code == 307:
+ print(link+" ==> REDIRECT")
else:
print("ERROR: link `"+link+"` failed with status "
, result.status_code)
+ if DO_FAIL:
+ raise Exception("Failed verify")
except ConnectionError as ex:
print("ERROR: ", link, ex)
+ if DO_FAIL:
+ raise ex
def check_page(host, start_url):
print("")
- print("Checking links in page `"+start_url+"`")
+ print("Checking links host "+host+" in page `"+start_url+"`")
doc = parse(start_url).getroot()
links = get_links(doc)
internal_links = filter(is_internal_link, links)
external_links = filter(lambda x: not is_internal_link(x), links)
- external_links.append("http://somenon-existentsite.brr")
+ # external_links.append("http://somenon-existentsite.brr")
for link in internal_links:
verify_link(host+link)