aboutsummaryrefslogtreecommitdiff
path: root/test/requests/links_scraper/genelinks.py
diff options
context:
space:
mode:
authorAlexander Kabui2021-02-11 23:11:45 +0300
committerBonfaceKilz2021-02-13 16:55:08 +0300
commitb53a8362ba1d3031ece2deefdc3309823b932012 (patch)
tree6085038987a0bca25364649285cc3d448077e4a3 /test/requests/links_scraper/genelinks.py
parent1c23f038db22d039f5a407db4a00e507123b8189 (diff)
downloadgenenetwork2-b53a8362ba1d3031ece2deefdc3309823b932012.tar.gz
add search for templates
Diffstat (limited to 'test/requests/links_scraper/genelinks.py')
-rw-r--r--test/requests/links_scraper/genelinks.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index ca98f62f..6a3d363e 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -11,10 +11,25 @@ from urllib.parse import urlparse
PORT = os.environ.get("PORT", "5004")
+TEMPLATE_PATH = "../wqflask/wqflask/templates"
BROKEN_LINKS = set()
+def search_templates():
+ """searches for broken links in templates"""
+ html_parsed_pages = []
+ for subdir, dirs, files in os.walk(TEMPLATE_PATH):
+ for file in files:
+ file_path = os.path.join(subdir, file)
+ if file_path.endswith(".html"):
+ parsed_page = soup(
+ open(file_path, encoding="utf8"), "html.parser")
+ html_parsed_pages.append(parsed_page)
+
+ return html_parsed_pages
+
+
def is_valid_link(url_link):
try:
result = urlparse(url_link)
@@ -107,6 +122,8 @@ def webpages_to_check():
if __name__ == '__main__':
+ # results = search_templates()
+
for page in webpages_to_check():
fetch_page_links(page)
if len(BROKEN_LINKS) > 0: