diff options
author | Pjotr Prins | 2018-03-19 10:52:28 +0100 |
---|---|---|
committer | GitHub | 2018-03-19 10:52:28 +0100 |
commit | b359cb0712e9ef923d35524f310d841581d48f30 (patch) | |
tree | 3c4cafc0bc0b481490674c807f279b4371130d0c /test/requests/link_checker.py | |
parent | 2602be69f2869de376d1b9ced6131d880e9476c2 (diff) | |
parent | 5ccf077f53c6546bb9258c7116b4b1cf8903375f (diff) | |
download | genenetwork2-b359cb0712e9ef923d35524f310d841581d48f30.tar.gz |
Merge pull request #5 from fredmanglis/testing
Testing
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r-- | test/requests/link_checker.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py new file mode 100644 index 00000000..256bf6ef --- /dev/null +++ b/test/requests/link_checker.py @@ -0,0 +1,63 @@ +from __future__ import print_function +import re +import requests +from lxml.html import parse +from requests.exceptions import ConnectionError + +def is_root_link(link): + pattern = re.compile("^/$") + return pattern.match(link) + +def is_mailto_link(link): + pattern = re.compile("^mailto:.*") + return pattern.match(link) + +def is_internal_link(link): + pattern = re.compile("^/.*") + return pattern.match(link) + +def get_links(doc): + return filter( + lambda x: not ( + is_root_link(x) + or is_mailto_link(x)) + , map(lambda y: y.get("href") + , doc.cssselect("a"))) + +def verify_link(link): + try: + result = requests.get(link, timeout=20) + if result.status_code == 200: + print(link+" ==> OK") + else: + print("ERROR: link `"+link+"` failed with status " + , result.status_code) + except ConnectionError as ex: + print("ERROR: ", link, ex) + +def check_page(host, start_url): + print("") + print("Checking links in page `"+start_url+"`") + doc = parse(start_url).getroot() + links = get_links(doc) + internal_links = filter(is_internal_link, links) + external_links = filter(lambda x: not is_internal_link(x), links) + external_links.append("http://somenon-existentsite.brr") + for link in internal_links: + verify_link(host+link) + + for link in external_links: + verify_link(link) + +def check_links(args_obj, parser): + print("") + print("Checking links") + host = args_obj.host + + # Check the home page + check_page(host, host) + + # Check traits page + check_page( + host, + host+"/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P") |