diff options
-rw-r--r-- | test/requests/links_scraper/genelinks.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py index ca98f62f..6a3d363e 100644 --- a/test/requests/links_scraper/genelinks.py +++ b/test/requests/links_scraper/genelinks.py @@ -11,10 +11,25 @@ from urllib.parse import urlparse PORT = os.environ.get("PORT", "5004") +TEMPLATE_PATH = "../wqflask/wqflask/templates" BROKEN_LINKS = set() +def search_templates(): + """searches for broken links in templates""" + html_parsed_pages = [] + for subdir, dirs, files in os.walk(TEMPLATE_PATH): + for file in files: + file_path = os.path.join(subdir, file) + if file_path.endswith(".html"): + parsed_page = soup( + open(file_path, encoding="utf8"), "html.parser") + html_parsed_pages.append(parsed_page) + + return html_parsed_pages + + def is_valid_link(url_link): try: result = urlparse(url_link) @@ -107,6 +122,8 @@ def webpages_to_check(): if __name__ == '__main__': + # results = search_templates() + for page in webpages_to_check(): fetch_page_links(page) if len(BROKEN_LINKS) > 0: |