diff options
author | zsloan | 2018-04-02 15:55:50 +0000 |
---|---|---|
committer | zsloan | 2018-04-02 15:55:50 +0000 |
commit | 7bd041f66cc4f2548a132e4d69d6533b81f005fc (patch) | |
tree | ee77d99cb35c85e7c27da7c55543631b44f1ab22 /test/requests/link_checker.py | |
parent | b17b78a46aec1ca515bfef7168232020cf09a151 (diff) | |
parent | 92e89794fa57ef1570631403a8ceebf52a2cbc08 (diff) | |
download | genenetwork2-7bd041f66cc4f2548a132e4d69d6533b81f005fc.tar.gz |
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r-- | test/requests/link_checker.py | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py index 256bf6ef..64553ed8 100644 --- a/test/requests/link_checker.py +++ b/test/requests/link_checker.py @@ -4,6 +4,8 @@ import requests from lxml.html import parse from requests.exceptions import ConnectionError +DO_FAIL=False # fail on error + def is_root_link(link): pattern = re.compile("^/$") return pattern.match(link) @@ -25,24 +27,34 @@ def get_links(doc): , doc.cssselect("a"))) def verify_link(link): + if link[0] == "#": + # local link on page + return + print("verifying "+link) try: - result = requests.get(link, timeout=20) + result = requests.get(link, timeout=20, verify=False) if result.status_code == 200: print(link+" ==> OK") + elif result.status_code == 307: + print(link+" ==> REDIRECT") else: print("ERROR: link `"+link+"` failed with status " , result.status_code) + if DO_FAIL: + raise Exception("Failed verify") except ConnectionError as ex: print("ERROR: ", link, ex) + if DO_FAIL: + raise ex def check_page(host, start_url): print("") - print("Checking links in page `"+start_url+"`") + print("Checking links host "+host+" in page `"+start_url+"`") doc = parse(start_url).getroot() links = get_links(doc) internal_links = filter(is_internal_link, links) external_links = filter(lambda x: not is_internal_link(x), links) - external_links.append("http://somenon-existentsite.brr") + # external_links.append("http://somenon-existentsite.brr") for link in internal_links: verify_link(host+link) |