aboutsummaryrefslogtreecommitdiff
path: root/test/requests/link_checker.py
diff options
context:
space:
mode:
authorzsloan2018-04-13 15:47:42 +0000
committerzsloan2018-04-13 15:47:42 +0000
commiteb24f53d7f5210ead3748772bb4126f78520f32c (patch)
treee58268dc13fb494818095021bf5e8510da6f7684 /test/requests/link_checker.py
parent9276e5eee9be7ed37fda5ea88aec2f1a238864ad (diff)
parent270f86c41f7c90cc4ca51bca0aec789a09a36a0e (diff)
downloadgenenetwork2-eb24f53d7f5210ead3748772bb4126f78520f32c.tar.gz
Resolved conflicts for pulling from testing branch
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r--test/requests/link_checker.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py
new file mode 100644
index 00000000..64553ed8
--- /dev/null
+++ b/test/requests/link_checker.py
@@ -0,0 +1,75 @@
+from __future__ import print_function
+import re
+import requests
+from lxml.html import parse
+from requests.exceptions import ConnectionError
+
+DO_FAIL=False # fail on error
+
+def is_root_link(link):
+ pattern = re.compile("^/$")
+ return pattern.match(link)
+
+def is_mailto_link(link):
+ pattern = re.compile("^mailto:.*")
+ return pattern.match(link)
+
+def is_internal_link(link):
+ pattern = re.compile("^/.*")
+ return pattern.match(link)
+
+def get_links(doc):
+ return filter(
+ lambda x: not (
+ is_root_link(x)
+ or is_mailto_link(x))
+ , map(lambda y: y.get("href")
+ , doc.cssselect("a")))
+
+def verify_link(link):
+ if link[0] == "#":
+ # local link on page
+ return
+ print("verifying "+link)
+ try:
+ result = requests.get(link, timeout=20, verify=False)
+ if result.status_code == 200:
+ print(link+" ==> OK")
+ elif result.status_code == 307:
+ print(link+" ==> REDIRECT")
+ else:
+ print("ERROR: link `"+link+"` failed with status "
+ , result.status_code)
+ if DO_FAIL:
+ raise Exception("Failed verify")
+ except ConnectionError as ex:
+ print("ERROR: ", link, ex)
+ if DO_FAIL:
+ raise ex
+
+def check_page(host, start_url):
+ print("")
+ print("Checking links host "+host+" in page `"+start_url+"`")
+ doc = parse(start_url).getroot()
+ links = get_links(doc)
+ internal_links = filter(is_internal_link, links)
+ external_links = filter(lambda x: not is_internal_link(x), links)
+ # external_links.append("http://somenon-existentsite.brr")
+ for link in internal_links:
+ verify_link(host+link)
+
+ for link in external_links:
+ verify_link(link)
+
+def check_links(args_obj, parser):
+ print("")
+ print("Checking links")
+ host = args_obj.host
+
+ # Check the home page
+ check_page(host, host)
+
+ # Check traits page
+ check_page(
+ host,
+ host+"/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P")