about summary refs log tree commit diff
path: root/test/requests/link_checker.py
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2018-03-31 14:45:17 +0300
committerMuriithi Frederick Muriuki2018-03-31 14:45:17 +0300
commit31747256e466456cc88aac60780836a6534f18cd (patch)
treea7b371e4ec217ffb37594e15f5e603ae28511dc2 /test/requests/link_checker.py
parentcfce6d80be4fb38573c37d1943db2687d54cf2fc (diff)
parent5424741a4a126bfd5f04df7cbcdc30b4e1376b86 (diff)
downloadgenenetwork2-31747256e466456cc88aac60780836a6534f18cd.tar.gz
Merge branch 'testing' of https://github.com/genenetwork/genenetwork2 into testing
* Fix conflicts
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r--test/requests/link_checker.py24
1 files changed, 17 insertions, 7 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py
index a75327f0..715f330c 100644
--- a/test/requests/link_checker.py
+++ b/test/requests/link_checker.py
@@ -4,6 +4,8 @@ import requests
 from lxml.html import parse
 from requests.exceptions import ConnectionError
 
+DO_FAIL=False  # fail on error
+
 def is_root_link(link):
     pattern = re.compile("^/$")
     return pattern.match(link)
@@ -29,28 +31,36 @@ def get_links(doc):
               , doc.cssselect("a")))
 
 def verify_link(link):
+    if link[0] == "#":
+        # local link on page
+        return
+    print("verifying "+link)
     try:
-        result = requests.get(link, timeout=20)
+        result = requests.get(link, timeout=20, verify=False)
         if result.status_code == 200:
             print(link+" ==> OK")
+        elif result.status_code == 307:
+            print(link+" ==> REDIRECT")
         else:
             print("ERROR: link `"+link+"` failed with status "
                   , result.status_code)
-    except Exception as ex:
-        print("ERROR: ("+link+")", ex)
+
+            if DO_FAIL:
+                raise Exception("Failed verify")
+    except ConnectionError as ex:
+        print("ERROR: ", link, ex)
+        if DO_FAIL:
+            raise ex
 
 def check_page(host, start_url):
     print("")
-    print("Checking links in page `"+start_url+"`")
+    print("Checking links host "+host+" in page `"+start_url+"`")
     doc = parse(start_url).getroot()
     links = get_links(doc)
     in_page_links = filter(is_in_page_link, links)
     internal_links = filter(is_internal_link, links)
     external_links = filter(lambda x: not (is_internal_link(x) or is_in_page_link(x)), links)
 
-    for link in in_page_links:
-        verify_link(start_url+link)
-
     for link in internal_links:
         verify_link(host+link)