diff options
author | Alexander Kabui | 2021-02-11 23:11:45 +0300 |
---|---|---|
committer | BonfaceKilz | 2021-02-13 16:55:08 +0300 |
commit | b53a8362ba1d3031ece2deefdc3309823b932012 (patch) | |
tree | 6085038987a0bca25364649285cc3d448077e4a3 /test/requests/links_scraper | |
parent | 1c23f038db22d039f5a407db4a00e507123b8189 (diff) | |
download | genenetwork2-b53a8362ba1d3031ece2deefdc3309823b932012.tar.gz |
add search for templates
Diffstat (limited to 'test/requests/links_scraper')
-rw-r--r-- | test/requests/links_scraper/genelinks.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py index ca98f62f..6a3d363e 100644 --- a/test/requests/links_scraper/genelinks.py +++ b/test/requests/links_scraper/genelinks.py @@ -11,10 +11,25 @@ from urllib.parse import urlparse PORT = os.environ.get("PORT", "5004") +TEMPLATE_PATH = "../wqflask/wqflask/templates" BROKEN_LINKS = set() +def search_templates(): + """searches for broken links in templates""" + html_parsed_pages = [] + for subdir, dirs, files in os.walk(TEMPLATE_PATH): + for file in files: + file_path = os.path.join(subdir, file) + if file_path.endswith(".html"): + parsed_page = soup( + open(file_path, encoding="utf8"), "html.parser") + html_parsed_pages.append(parsed_page) + + return html_parsed_pages + + def is_valid_link(url_link): try: result = urlparse(url_link) @@ -107,6 +122,8 @@ def webpages_to_check(): if __name__ == '__main__': + # results = search_templates() + for page in webpages_to_check(): fetch_page_links(page) if len(BROKEN_LINKS) > 0: |