aboutsummaryrefslogtreecommitdiff
path: root/test/requests
diff options
context:
space:
mode:
authorzsloan2021-05-12 18:15:49 +0000
committerzsloan2021-05-12 18:15:49 +0000
commit82c139048e1f7f34b29d2c040866e1bc5ea02134 (patch)
treeb0b61fe2503d737c3e611fef93659b9194297f42 /test/requests
parent1efc014dd5efc34d95b536f51f8a1de914069e20 (diff)
parentc4ec08c1b544c05cce1c1247b8bc1afb741643e4 (diff)
downloadgenenetwork2-82c139048e1f7f34b29d2c040866e1bc5ea02134.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
Diffstat (limited to 'test/requests')
-rw-r--r--test/requests/links_scraper/genelinks.py30
1 files changed, 9 insertions, 21 deletions
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 12300f4a..52c13489 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -2,7 +2,6 @@ import re
import requests
import urllib3
import os
-import logging
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
@@ -26,7 +25,6 @@ def search_templates():
parsed_page = soup(
open(file_path, encoding="utf8"), "html.parser")
html_parsed_pages.append(parsed_page)
-
return html_parsed_pages
@@ -34,7 +32,7 @@ def is_valid_link(url_link):
try:
result = urlparse(url_link)
return all([result.scheme, result.netloc, result.path])
- except Exception as e:
+ except Exception:
return False
@@ -42,13 +40,10 @@ def test_link(link):
print(f'Checking -->{link}')
results = None
try:
-
results = requests.get(link, verify=False, timeout=10)
status_code = results.status_code
-
- except Exception as e:
+ except Exception:
status_code = 408
-
return int(status_code) > 403
@@ -56,14 +51,11 @@ def fetch_css_links(parsed_page):
print("fetching css links")
for link in parsed_page.findAll("link"):
full_path = None
-
link_url = link.attrs.get("href")
if is_valid_link(link_url):
full_path = link_url
-
elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url):
full_path = urljoin('http://localhost:5004/', link_url)
-
if full_path is not None:
if test_link(full_path):
BROKEN_LINKS.add(full_path)
@@ -71,16 +63,13 @@ def fetch_css_links(parsed_page):
def fetch_html_links(parsed_page):
print("fetching a tags ")
-
for link in parsed_page.findAll("a"):
full_path = None
link_url = link.attrs.get("href")
if re.match(r"^/", link_url):
full_path = urljoin('http://localhost:5004/', link_url)
-
elif is_valid_link(link_url):
full_path = link_url
-
if full_path is not None:
if test_link(full_path):
BROKEN_LINKS.add(full_path)
@@ -92,8 +81,11 @@ def fetch_script_tags(parsed_page):
js_link = link.attrs.get("src")
if js_link is not None:
if is_valid_link(js_link):
- raise SystemExit("Failed,the library should be packaged in guix.\
- Please contact,http://genenetwork.org/ for more details")
+ raise SystemExit("Failed,the library should be "
+ "packaged in guix. "
+ "Please contact, "
+ "http://genenetwork.org/ "
+ "for more details")
elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link):
full_path = urljoin('http://localhost:5004/', js_link)
@@ -102,11 +94,9 @@ def fetch_script_tags(parsed_page):
def fetch_page_links(page_url):
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
html_page = uReq(page_url)
parsed_page = soup(html_page, "html.parser")
-
fetch_script_tags(parsed_page=parsed_page)
fetch_css_links(parsed_page=parsed_page)
fetch_html_links(parsed_page=parsed_page)
@@ -114,13 +104,10 @@ def fetch_page_links(page_url):
def webpages_to_check():
pages = [f"http://localhost:{PORT}/"]
-
return pages
if __name__ == '__main__':
- # results = search_templates()
-
for page in webpages_to_check():
fetch_page_links(page)
if len(BROKEN_LINKS) > 0:
@@ -130,4 +117,5 @@ if __name__ == '__main__':
if len(BROKEN_LINKS) > 0:
raise SystemExit(
- "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
+ "The links Above are broken. "
+ "Please contact genenetwork.org<<<<<<<<")