about summary refs log tree commit diff
path: root/test/requests/link_checker.py
diff options
context:
space:
mode:
authorBonfaceKilz2020-09-28 18:13:19 +0300
committerBonfaceKilz2020-09-28 18:13:19 +0300
commitda6098574f8b410386e84f07fd0e8d0eed39e40d (patch)
tree5da9a54c80ea4525aa2fb08f9dc3012c99626ed9 /test/requests/link_checker.py
parentd34258bed3ef13350499414100401df3bf08a105 (diff)
parent367de7d8bd822a80cdc035a219b814f0b268b65f (diff)
downloadgenenetwork2-da6098574f8b410386e84f07fd0e8d0eed39e40d.tar.gz
Merge branch 'build/python3-migration' of github.com:BonfaceKilz/genenetwork2 into build/python3-migration
Diffstat (limited to 'test/requests/link_checker.py')
-rw-r--r--test/requests/link_checker.py16
1 files changed, 6 insertions, 10 deletions
diff --git a/test/requests/link_checker.py b/test/requests/link_checker.py
index d040ba54..6ac26ba7 100644
--- a/test/requests/link_checker.py
+++ b/test/requests/link_checker.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 import re
 import requests
 from lxml.html import parse
@@ -23,12 +22,9 @@ def is_in_page_link(link):
     return pattern.match(link)
 
 def get_links(doc):
-    return filter(
-        lambda x: not (
+    return [x for x in [y.get("href") for y in doc.cssselect("a")] if not (
             is_root_link(x)
-            or is_mailto_link(x))
-        , map(lambda y: y.get("href")
-              , doc.cssselect("a")))
+            or is_mailto_link(x))]
 
 def verify_link(link):
     if link[0] == "#":
@@ -58,7 +54,7 @@ def verify_static_file(link):
     try:
         result = requests.get(link, timeout=20, verify=False)
         if (result.status_code == 200 and
-                result.content.find("Error: 404 Not Found") <= 0):
+                result.content.find(bytes("Error: 404 Not Found", "utf-8")) <= 0):
             print(link+" ==> OK")
         else:
             print("ERROR: link {}".format(link))
@@ -72,9 +68,9 @@ def check_page(host, start_url):
     print("Checking links host "+host+" in page `"+start_url+"`")
     doc = parse(start_url).getroot()
     links = get_links(doc)
-    in_page_links = filter(is_in_page_link, links)
-    internal_links = filter(is_internal_link, links)
-    external_links = filter(lambda x: not (is_internal_link(x) or is_in_page_link(x)), links)
+    in_page_links = list(filter(is_in_page_link, links))
+    internal_links = list(filter(is_internal_link, links))
+    external_links = [x for x in links if not (is_internal_link(x) or is_in_page_link(x))]
 
     for link in internal_links:
         verify_link(host+link)