diff options
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r-- | test/requests/link_checker.py | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py new file mode 100644 index 00000000..d040ba54 --- /dev/null +++ b/test/requests/link_checker.py @@ -0,0 +1,127 @@ +from __future__ import print_function +import re +import requests +from lxml.html import parse +from requests.exceptions import ConnectionError + +DO_FAIL=False # fail on error + +def is_root_link(link): + pattern = re.compile("^/$") + return pattern.match(link) + +def is_mailto_link(link): + pattern = re.compile("^mailto:.*") + return pattern.match(link) + +def is_internal_link(link): + pattern = re.compile("^/.*") + return pattern.match(link) + +def is_in_page_link(link): + pattern = re.compile("^#.*") + return pattern.match(link) + +def get_links(doc): + return filter( + lambda x: not ( + is_root_link(x) + or is_mailto_link(x)) + , map(lambda y: y.get("href") + , doc.cssselect("a"))) + +def verify_link(link): + if link[0] == "#": + # local link on page + return + print("verifying "+link) + try: + result = requests.get(link, timeout=20, verify=False) + if result.status_code == 200: + print(link+" ==> OK") + elif result.status_code == 307: + print(link+" ==> REDIRECT") + else: + print("ERROR: link `"+link+"` failed with status " + , result.status_code) + + if DO_FAIL: + raise Exception("Failed verify") + except ConnectionError as ex: + print("ERROR: ", link, ex) + if DO_FAIL: + raise ex + + +def verify_static_file(link): + print("verifying "+link) + try: + result = requests.get(link, timeout=20, verify=False) + if (result.status_code == 200 and + result.content.find("Error: 404 Not Found") <= 0): + print(link+" ==> OK") + else: + print("ERROR: link {}".format(link)) + raise Exception("Failed verify") + except ConnectionError as ex: + print("ERROR: ", link, ex) + + +def check_page(host, start_url): + print("") + print("Checking links host "+host+" in page `"+start_url+"`") + doc = parse(start_url).getroot() + links = get_links(doc) + in_page_links = filter(is_in_page_link, links) + internal_links = filter(is_internal_link, links) + external_links = filter(lambda x: not (is_internal_link(x) or is_in_page_link(x)), links) + + for link in internal_links: + verify_link(host+link) + + for link in external_links: + verify_link(link) + +def check_links(args_obj, parser): + print("") + print("Checking links") + host = args_obj.host + + # Check the home page + check_page(host, host) + + # Check traits page + check_page( + host, + host+"/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P") + + +def check_packaged_js_files(args_obj, parser): + host = args_obj.host + js_files = [ + # Datatables Extensions: + "/css/DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css", + "/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js", + "/css/DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css", + "/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js", + "/js/DataTablesExtensions/colResize/dataTables.colResize.js", + "/js/DataTablesExtensions/colReorder/js/dataTables.colReorder.js", + "/js/DataTablesExtensions/buttons/js/buttons.colVis.min.js", + "/js/DataTables/js/jquery.dataTables.js", + "/css/DataTablesExtensions/scroller/css/scroller.dataTables.min.css", + # Datatables plugins: + "/js/DataTablesExtensions/plugins/sorting/natural.js", + "/js/DataTablesExtensions/plugins/sorting/scientific.js", + # Other js libraries + "/js/chroma/chroma.min.js", + "/js/d3-tip/d3-tip.js", + "/js/d3js/d3.min.js", + "/js/js_alt/underscore.min.js", + "/js/nvd3/nv.d3.min.css", + "/js/qtip2/jquery.qtip.min.js", + "/js/js_alt/md5.min.js", + ] + + print("Checking links") + for link in js_files: + verify_static_file(host+link) |