about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--test/requests/links_scraper/genelinks.py29
1 files changed, 9 insertions, 20 deletions
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 13aee7c8..52c13489 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -25,7 +25,6 @@ def search_templates():
                 parsed_page = soup(
                     open(file_path, encoding="utf8"), "html.parser")
                 html_parsed_pages.append(parsed_page)
-
     return html_parsed_pages
 
 
@@ -33,7 +32,7 @@ def is_valid_link(url_link):
     try:
         result = urlparse(url_link)
         return all([result.scheme, result.netloc, result.path])
-    except Exception as e:
+    except Exception:
         return False
 
 
@@ -41,13 +40,10 @@ def test_link(link):
     print(f'Checking -->{link}')
     results = None
     try:
-
         results = requests.get(link, verify=False, timeout=10)
         status_code = results.status_code
-
-    except Exception as e:
+    except Exception:
         status_code = 408
-
     return int(status_code) > 403
 
 
@@ -55,14 +51,11 @@ def fetch_css_links(parsed_page):
     print("fetching css links")
     for link in parsed_page.findAll("link"):
         full_path = None
-
         link_url = link.attrs.get("href")
         if is_valid_link(link_url):
             full_path = link_url
-
         elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url):
             full_path = urljoin('http://localhost:5004/', link_url)
-
         if full_path is not None:
             if test_link(full_path):
                 BROKEN_LINKS.add(full_path)
@@ -70,16 +63,13 @@ def fetch_css_links(parsed_page):
 
 def fetch_html_links(parsed_page):
     print("fetching a tags ")
-
     for link in parsed_page.findAll("a"):
         full_path = None
         link_url = link.attrs.get("href")
         if re.match(r"^/", link_url):
             full_path = urljoin('http://localhost:5004/', link_url)
-
         elif is_valid_link(link_url):
             full_path = link_url
-
         if full_path is not None:
             if test_link(full_path):
                 BROKEN_LINKS.add(full_path)
@@ -91,8 +81,11 @@ def fetch_script_tags(parsed_page):
         js_link = link.attrs.get("src")
         if js_link is not None:
             if is_valid_link(js_link):
-                raise SystemExit("Failed,the library should be packaged in guix.\
-                                Please contact,http://genenetwork.org/ for more details")
+                raise SystemExit("Failed,the library should be "
+                                 "packaged in guix. "
+                                 "Please contact, "
+                                 "http://genenetwork.org/ "
+                                 "for more details")
 
             elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link):
                 full_path = urljoin('http://localhost:5004/', js_link)
@@ -101,11 +94,9 @@ def fetch_script_tags(parsed_page):
 
 
 def fetch_page_links(page_url):
-
     urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
     html_page = uReq(page_url)
     parsed_page = soup(html_page, "html.parser")
-
     fetch_script_tags(parsed_page=parsed_page)
     fetch_css_links(parsed_page=parsed_page)
     fetch_html_links(parsed_page=parsed_page)
@@ -113,13 +104,10 @@ def fetch_page_links(page_url):
 
 def webpages_to_check():
     pages = [f"http://localhost:{PORT}/"]
-
     return pages
 
 
 if __name__ == '__main__':
-    # results = search_templates()
-
     for page in webpages_to_check():
         fetch_page_links(page)
         if len(BROKEN_LINKS) > 0:
@@ -129,4 +117,5 @@ if __name__ == '__main__':
 
     if len(BROKEN_LINKS) > 0:
         raise SystemExit(
-            "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
+            "The links Above are broken. "
+            "Please contact genenetwork.org<<<<<<<<")