From 06986d1c3a9d76e9e4b0f0d2a7c89746a5a19990 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Tue, 9 Feb 2021 23:28:26 +0300
Subject: add check for broken links in genenetwork homepage

---
 test/requests/links_scraper/genelinks.py | 92 ++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 test/requests/links_scraper/genelinks.py

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
new file mode 100644
index 00000000..223a2c31
--- /dev/null
+++ b/test/requests/links_scraper/genelinks.py
@@ -0,0 +1,92 @@
+import re
+import requests
+import urllib3
+import os
+import logging
+
+from urllib.request import urlopen as uReq
+from bs4 import BeautifulSoup as soup
+from urllib.parse import urljoin
+
+
+PORT = os.environ.get("PORT", "5004")
+
+
+def test_link(link, strict=True):
+    print(f"link testing {link}")
+    results = None
+    try:
+
+        results = requests.get(link, verify=False, timeout=10)
+
+    except Exception as e:
+        if strict:
+            raise SystemExit(
+                "The link does not exists or is wrongly formatted")
+        else:
+            logging.error(f"FAILED:{link} does not exists or is wrongly formatted")
+
+    status_code = results.status_code if results is not None else "404"
+
+    print(f'the link {link} ---> {status_code}')
+
+
+def fetch_css_links(parsed_page):
+    print("fetching css links")
+    for link in parsed_page.findAll("link"):
+        full_path = None
+
+        link_url = link.attrs.get("href")
+        if re.match(r"^http://", link_url):
+            pass
+            # not sure whether to raise an error here for external css links
+
+        elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url):
+            full_path = urljoin('http://localhost:5004/', link_url)
+
+        if full_path is not None:
+            test_link(full_path)
+
+
+def fetch_html_links(parsed_page):
+    print("fetching a tags ")
+
+    for link in parsed_page.findAll("a"):
+        full_path = None
+        link_url = link.attrs.get("href")
+        if re.match(r"^/", link_url):
+            full_path = urljoin('http://localhost:5004/', link_url)
+
+        elif re.match(r'^http://', link_url):
+            full_path = link_url
+
+        if full_path is not None:
+            test_link(full_path)
+
+
+def fetch_script_tags(parsed_page):
+    print("--->fetching js links")
+    for link in parsed_page.findAll("script"):
+        js_link = link.attrs.get("src")
+        if js_link is not None:
+            if re.match(r'^http://', js_link):
+                raise SystemExit("Failed,the library should be packaged in guix.\
+                                Please contact,http://genenetwork.org/ for more details")
+
+            elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link):
+                full_path = urljoin('http://localhost:5004/', js_link)
+                test_link(full_path)
+
+
+def fetch_page_links(page_url):
+
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+    html_page = uReq(page_url)
+    parsed_page = soup(html_page, "html.parser")
+
+    fetch_script_tags(parsed_page=parsed_page)
+    fetch_css_links(parsed_page=parsed_page)
+    fetch_html_links(parsed_page=parsed_page)
+
+
+fetch_page_links(f"http://localhost:{PORT}/")
-- 
cgit v1.2.3


From cd5b32a69215bf5c168b7619ebc881908845204c Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 10 Feb 2021 08:11:33 +0300
Subject: add broken links checker to workflow

---
 .github/workflows/main.yml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2342796a..79c69699 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -46,3 +46,22 @@ jobs:
         WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
         GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
         etc/default_settings.py -c -m unittest discover -v
+
+    - name: Start Genenetwork as a  Background Task
+      run: |
+        env GN2_PROFILE=/gn2-profile \
+        TMPDIR=/tmp SERVER_PORT=5004 \
+        WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
+        GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
+        etc/default_settings.py
+
+    - name: Test for Broken Links
+      run: |
+
+        env GN2_PROFILE=/gn2-profile \
+        TMPDIR=/tmp\
+        WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
+        GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
+        etc/default_settings.py -c -m\
+        /__w/genenetwork/genenetwork/test/requests/links_scraper/genelinks.py 
+
-- 
cgit v1.2.3


From 02ac2fd38fea7f85b3ef89464157a7c0d1ffcac7 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 10 Feb 2021 08:14:03 +0300
Subject: fix:starting genenetwork on the background

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 79c69699..a8642806 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -53,7 +53,7 @@ jobs:
         TMPDIR=/tmp SERVER_PORT=5004 \
         WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
         GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
-        etc/default_settings.py
+        etc/default_settings.py&
 
     - name: Test for Broken Links
       run: |
-- 
cgit v1.2.3


From e67e6d7f88ffb21e4101147c5abadb2b7c78e5ae Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 10 Feb 2021 08:18:47 +0300
Subject: fix error

---
 .github/workflows/main.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a8642806..d4649d4f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -62,6 +62,5 @@ jobs:
         TMPDIR=/tmp\
         WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
         GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
-        etc/default_settings.py -c -m\
-        /__w/genenetwork/genenetwork/test/requests/links_scraper/genelinks.py 
+        etc/default_settings.py -c -m  /__w/genenetwork/genenetwork/test/requests/links_scraper/genelinks.py 
 
-- 
cgit v1.2.3


From f3a8fdc660504e0ea74ae63d5ed7c891db6e3963 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 10 Feb 2021 08:20:18 +0300
Subject: fix paths issues

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d4649d4f..5d46ccb9 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -62,5 +62,5 @@ jobs:
         TMPDIR=/tmp\
         WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
         GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
-        etc/default_settings.py -c -m  /__w/genenetwork/genenetwork/test/requests/links_scraper/genelinks.py 
+        etc/default_settings.py -c -m  /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py 
 
-- 
cgit v1.2.3


From e1c3827d65a35d87e45cefe04135c1ff16374410 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 03:26:49 +0300
Subject: add links validator

---
 test/requests/links_scraper/genelinks.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 223a2c31..5dddcc47 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -7,11 +7,20 @@ import logging
 from urllib.request import urlopen as uReq
 from bs4 import BeautifulSoup as soup
 from urllib.parse import urljoin
+from urllib.parse import urlparse
 
 
 PORT = os.environ.get("PORT", "5004")
 
 
+def is_valid_link(url_link):
+    try:
+        result = urlparse(url_link)
+        return all([result.scheme, result.netloc, result.path])
+    except Exception as e:
+        return False
+
+
 def test_link(link, strict=True):
     print(f"link testing {link}")
     results = None
@@ -57,7 +66,8 @@ def fetch_html_links(parsed_page):
         if re.match(r"^/", link_url):
             full_path = urljoin('http://localhost:5004/', link_url)
 
-        elif re.match(r'^http://', link_url):
+        elif is_valid_link(link_url):
+            print(link_url)
             full_path = link_url
 
         if full_path is not None:
-- 
cgit v1.2.3


From 6f3273429482dea1aaaa4e5fe61b178485e271b8 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 07:26:04 +0300
Subject: edit workflow

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 5d46ccb9..a36abc0a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -62,5 +62,5 @@ jobs:
         TMPDIR=/tmp\
         WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
         GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
-        etc/default_settings.py -c -m  /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py 
+        etc/default_settings.py -c  /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py 
 
-- 
cgit v1.2.3


From 0cda88d6112ae0e4ae9ef5d4491a2d5695a07330 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 08:16:33 +0300
Subject: add check for url validity

---
 test/requests/links_scraper/genelinks.py | 61 ++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 5dddcc47..3b8ce230 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -12,6 +12,8 @@ from urllib.parse import urlparse
 
 PORT = os.environ.get("PORT", "5004")
 
+BROKEN_LINKS = set()
+
 
 def is_valid_link(url_link):
     try:
@@ -21,23 +23,21 @@ def is_valid_link(url_link):
         return False
 
 
-def test_link(link, strict=True):
-    print(f"link testing {link}")
+def test_link(link):
+    print(f'Checking -->{link}')
     results = None
     try:
 
         results = requests.get(link, verify=False, timeout=10)
+        status_code = results.status_code
 
     except Exception as e:
-        if strict:
-            raise SystemExit(
-                "The link does not exists or is wrongly formatted")
-        else:
-            logging.error(f"FAILED:{link} does not exists or is wrongly formatted")
+        status_code = 408
 
-    status_code = results.status_code if results is not None else "404"
+    if int(status_code) > 403:
+        return True
 
-    print(f'the link {link} ---> {status_code}')
+    return False
 
 
 def fetch_css_links(parsed_page):
@@ -46,15 +46,15 @@ def fetch_css_links(parsed_page):
         full_path = None
 
         link_url = link.attrs.get("href")
-        if re.match(r"^http://", link_url):
-            pass
-            # not sure whether to raise an error here for external css links
+        if is_valid_link(link_url):
+            full_path = link_url
 
         elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url):
             full_path = urljoin('http://localhost:5004/', link_url)
 
         if full_path is not None:
-            test_link(full_path)
+            if test_link(full_path):
+                BROKEN_LINKS.add(full_path)
 
 
 def fetch_html_links(parsed_page):
@@ -67,11 +67,11 @@ def fetch_html_links(parsed_page):
             full_path = urljoin('http://localhost:5004/', link_url)
 
         elif is_valid_link(link_url):
-            print(link_url)
             full_path = link_url
 
         if full_path is not None:
-            test_link(full_path)
+            if test_link(full_path):
+                BROKEN_LINKS.add(full_path)
 
 
 def fetch_script_tags(parsed_page):
@@ -79,13 +79,14 @@ def fetch_script_tags(parsed_page):
     for link in parsed_page.findAll("script"):
         js_link = link.attrs.get("src")
         if js_link is not None:
-            if re.match(r'^http://', js_link):
+            if is_valid_link(js_link):
                 raise SystemExit("Failed,the library should be packaged in guix.\
                                 Please contact,http://genenetwork.org/ for more details")
 
             elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link):
                 full_path = urljoin('http://localhost:5004/', js_link)
-                test_link(full_path)
+                if test_link(full_path):
+                    BROKEN_LINKS.add(full_path)
 
 
 def fetch_page_links(page_url):
@@ -99,4 +100,28 @@ def fetch_page_links(page_url):
     fetch_html_links(parsed_page=parsed_page)
 
 
-fetch_page_links(f"http://localhost:{PORT}/")
+def webpages_to_check():
+    pages = [
+
+        "http://localhost:/5004",
+
+
+
+
+
+
+    ]
+
+    return pages
+
+
+if __name__ == '__main__':
+    for page in webpages_to_check():
+        fetch_page_links(f"http://localhost:{PORT}/")
+        if BROKEN_LINKS is not None:
+            print("THE LINKS BELOW ARE BROKEN>>>>>>>>>>>>>")
+            for link in BROKEN_LINKS:
+                print(link)
+
+            raise SystemExit(
+                "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
-- 
cgit v1.2.3


From 5f4ad48a7afaca3cf34266c1012efedb7ef2ec46 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 08:43:37 +0300
Subject: pep8 formatting

---
 test/requests/links_scraper/genelinks.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 3b8ce230..00a71d57 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -101,23 +101,14 @@ def fetch_page_links(page_url):
 
 
 def webpages_to_check():
-    pages = [
-
-        "http://localhost:/5004",
-
-
-
-
-
-
-    ]
+    pages = [f"http://localhost:{PORT}/"]
 
     return pages
 
 
 if __name__ == '__main__':
     for page in webpages_to_check():
-        fetch_page_links(f"http://localhost:{PORT}/")
+        fetch_page_links(page)
         if BROKEN_LINKS is not None:
             print("THE LINKS BELOW ARE BROKEN>>>>>>>>>>>>>")
             for link in BROKEN_LINKS:
-- 
cgit v1.2.3


From a4e057917a42b073bcbe70d9ba80dd48ee56b618 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 21:47:41 +0300
Subject: add styles for broken links

---
 wqflask/wqflask/static/new/css/broken_links.css | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 wqflask/wqflask/static/new/css/broken_links.css

diff --git a/wqflask/wqflask/static/new/css/broken_links.css b/wqflask/wqflask/static/new/css/broken_links.css
new file mode 100644
index 00000000..676f32d9
--- /dev/null
+++ b/wqflask/wqflask/static/new/css/broken_links.css
@@ -0,0 +1,5 @@
+
+.broken_link{
+	color:red;
+	text-decoration: underline;
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 03cdfbe9f59414cd5c1d44a7be8c69a41c469930 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 21:48:12 +0300
Subject: replace broken links with text

---
 wqflask/wqflask/templates/base.html            | 10 ++++++----
 wqflask/wqflask/templates/index_page_orig.html |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index ec500d1e..ccb2ac5a 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -21,6 +21,8 @@
     <link rel="stylesheet" type="text/css" href="/static/new/css/colorbox.css"  />
     <!--<link rel="stylesheet" type="text/css" href="/static/new/css/main.css"  />-->
     <link rel="stylesheet" type="text/css" href="/static/new/css/parsley.css"  />
+    <link rel="stylesheet" type="text/css" href="/static/new/css/broken_links.css"  />
+
 
     {% block css %}
     {% endblock %}
@@ -80,9 +82,9 @@
                                 <a href="/help" class="dropdow-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Tools <span class="caret"></a>
                                 <ul class="dropdown-menu">
                                   <li><a href="/snp_browser">Variant Browser</a></li>
-                                  <li><a href="http://bnw.genenetwork.org/BNW/sourcecodes/home.php">Bayesian Network Webserver</a></li>
+                                  <li><a href="http://bnw.genenetwork.org/sourcecodes/home.php">Bayesian Network Webserver</a></li>
                                   <li><a href="https://systems-genetics.org/">Systems Genetics PheWAS</a></li>
-                                  <li><a href="http://ucscbrowser.genenetwork.org/">Genome Browser</a></li>
+                                  <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span></li>
                                   <li><a href="http://power.genenetwork.org">BXD Power Calculator</a></li>
                                   <li><a href="http://datafiles.genenetwork.org">Interplanetary File System</a></li>
                                 </ul>
@@ -197,7 +199,7 @@
                     (P20-DA 21131, 2001-2012)
                 </li>
                 <li>
-                    NCI <a href="http://emice.nci.nih.gov/">MMHCC</a> (U01CA105417), NCRR, <a href="http://www.birncommunity.org/">BIRN</a>, (U24 RR021760)
+                    NCI <a href="http://emice.nci.nih.gov/">MMHCC</a> (U01CA105417), NCRR, <span  class="broken_link test" href="http://www.birncommunity.org/">BIRN</span>, (U24 RR021760)
                 </li>
             </UL>
             <!--</p>-->
@@ -205,7 +207,7 @@
                     <a href="http://joss.theoj.org/papers/10.21105/joss.00025"><img src="https://camo.githubusercontent.com/846b750f582ae8f1d0b4f7e8fee78bed705c88ba/687474703a2f2f6a6f73732e7468656f6a2e6f72672f7061706572732f31302e32313130352f6a6f73732e30303032352f7374617475732e737667" alt="JOSS" data-canonical-src="http://joss.theoj.org/papers/10.21105/joss.00025/status.svg" style="max-width:100%;"></a>
             </p>
             <p>
-            Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</a> and find us on <a href="https://webchat.freenode.net/">IRC</a> (#genenetwork channel).
+            Development and source code on <a href="https://github.com/genenetwork/">github</a> with <a href="https://github.com/genenetwork/genenetwork2/issues">issue tracker</a> and <a href="https://github.com/genenetwork/genenetwork2/blob/master/README.md">documentation</a>. Join the <span class="broken_link" href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">mailing list</span> and find us on <a href="https://webchat.freenode.net/">IRC</a> (#genenetwork channel).
             {% if version: %}
             <p><small>GeneNetwork {{ version }}</small></p>
             {% endif %}
diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html
index 16caa30b..7f82b35c 100755
--- a/wqflask/wqflask/templates/index_page_orig.html
+++ b/wqflask/wqflask/templates/index_page_orig.html
@@ -193,7 +193,7 @@
                         <h1>Affiliates</h1>
                         <ul>
                           <li><b><a href="http://gn1.genenetwork.org">GeneNetwork 1</a> at UTHSC</b></li>
-                          <li><a href="http://ucscbrowser.genenetwork.org/">Genome Browser</a> at UTHSC</li>
+                          <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span> at UTHSC</li>
                           <li><a href="https://systems-genetics.org/">Systems Genetics</a> at EPFL</li>
                           <li><a href="http://bnw.genenetwork.org/">Bayesian Network Web Server</a> at UTHSC</li>
                           <li><a href="https://www.geneweaver.org/">GeneWeaver</a></li>
@@ -263,7 +263,7 @@
                     <h3>GeneNetwork v1:</h3>
                     <ul>
                         <li><a href="http://gn1.genenetwork.org/">Main website</a> at UTHSC</li>
-                        <li><a href="http://artemis.uthsc.edu/">Time Machine</a>: Full GN versions from 2009 to 2016 (mm9)</li>
+                        <li><span class="broken_link" href="http://artemis.uthsc.edu/">Time Machine</span>: Full GN versions from 2009 to 2016 (mm9)</li>
                             Cloud (EC2)</a></li>
                     </ul>
                     <script type="text/javascript" src="//rf.revolvermaps.com/0/0/8.js?i=526mdlpknyd&amp;m=0&amp;c=ff0000&amp;cr1=ffffff&amp;f=arial&amp;l=33" async="async"></script>
-- 
cgit v1.2.3


From f46991d6751efaac1687c12a74a92d913d61ac54 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 22:32:56 +0300
Subject: modify code for link_checker

---
 test/requests/links_scraper/genelinks.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 00a71d57..ca98f62f 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -109,10 +109,11 @@ def webpages_to_check():
 if __name__ == '__main__':
     for page in webpages_to_check():
         fetch_page_links(page)
-        if BROKEN_LINKS is not None:
+        if len(BROKEN_LINKS) > 0:
             print("THE LINKS BELOW ARE BROKEN>>>>>>>>>>>>>")
             for link in BROKEN_LINKS:
                 print(link)
 
-            raise SystemExit(
-                "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
+    if len(BROKEN_LINKS) > 0:
+        raise SystemExit(
+            "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
-- 
cgit v1.2.3


From 1c23f038db22d039f5a407db4a00e507123b8189 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 22:33:47 +0300
Subject: fix:broken-links

---
 wqflask/wqflask/templates/credits.html                 |  4 ++--
 wqflask/wqflask/templates/data_sharing.html            | 18 +++++++++---------
 wqflask/wqflask/templates/index_page.html              |  9 ++++-----
 .../templates/show_trait_calculate_correlations.html   |  4 ++--
 .../wqflask/templates/show_trait_mapping_tools.html    |  2 +-
 5 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/wqflask/wqflask/templates/credits.html b/wqflask/wqflask/templates/credits.html
index 95c424cc..bcb37c48 100644
--- a/wqflask/wqflask/templates/credits.html
+++ b/wqflask/wqflask/templates/credits.html
@@ -31,8 +31,8 @@
 	 <UL>
   <LI><A HREF="http://www.nervenet.org/people/lulu_cv.html">Lu Lu</A>
 	<LI> <A HREF="http://www.jax.org/news/archives/2009/chesler.html">Elissa J. Chesler</A>
-	<LI><a href="http://www.ohsu.edu/som-BehNeuro/Faculty/Crabbe.html">John C Crabbe</a>, OHSU
-	<LI><a href="http://www.ohsu.edu/som-BehNeuro/Faculty/Belknap.html">John K Belknap</a>, OHSU
+	<LI><span class="broken-link" href="http://www.ohsu.edu/som-BehNeuro/Faculty/Crabbe.html">John C Crabbe</span>, OHSU
+	<LI><span class="broken-link" href="http://www.ohsu.edu/som-BehNeuro/Faculty/Belknap.html">John K Belknap</span>, OHSU
 	<LI>Mary-Kathleen Sullivan
   <LI>Emily English
 	<LI>Byron Jones 
diff --git a/wqflask/wqflask/templates/data_sharing.html b/wqflask/wqflask/templates/data_sharing.html
index 366e2075..cca498ec 100644
--- a/wqflask/wqflask/templates/data_sharing.html
+++ b/wqflask/wqflask/templates/data_sharing.html
@@ -82,9 +82,9 @@
 <P>The entire procedure can be reapplied once the initial outlier data sets have been eliminated to detect any remaining outlier data sets.
 
 
-<P><A HREF="http://www.datadesk.com/products/data_analysis/datadesk/" target="_empty" class="normalsize">DataDesk</A> was used to examine the statistical quality of the probe level (CEL) data after step 5 below. DataDesk allows the rapid detection of subsets of probes that are particularly sensitive to still unknown factors in array processing. Arrays can then be categorized at the probe level into "reaction classes." A reaction class is a group of arrays for which the expression of essentially all probes are colinear over the full range of log2 values. A single but large group of arrays (n = 32) processed in essentially the identical manner by a single operator can produce arrays belonging to as many as four different reaction classes. Reaction classes are NOT related to strain, age, sex, treatment, or any known biological parameter (technical replicates can belong to different reaction classes). We do not yet understand the technical origins of reaction classes. The number of probes that contribute to the definition of reaction classes is quite small (<10% of all probes). We have categorized all arrays in this data set into one of 5 reaction classes. These have then been treated as if they were separate batches. Probes in these data type "batches" have been aligned to a common mean as described below.
+<P><span class="broken_link" HREF="http://www.datadesk.com/products/data_analysis/datadesk/" target="_empty" class="normalsize">DataDesk</span> was used to examine the statistical quality of the probe level (CEL) data after step 5 below. DataDesk allows the rapid detection of subsets of probes that are particularly sensitive to still unknown factors in array processing. Arrays can then be categorized at the probe level into "reaction classes." A reaction class is a group of arrays for which the expression of essentially all probes are colinear over the full range of log2 values. A single but large group of arrays (n = 32) processed in essentially the identical manner by a single operator can produce arrays belonging to as many as four different reaction classes. Reaction classes are NOT related to strain, age, sex, treatment, or any known biological parameter (technical replicates can belong to different reaction classes). We do not yet understand the technical origins of reaction classes. The number of probes that contribute to the definition of reaction classes is quite small (<10% of all probes). We have categorized all arrays in this data set into one of 5 reaction classes. These have then been treated as if they were separate batches. Probes in these data type "batches" have been aligned to a common mean as described below.
 
-<P><B>Probe (cell) level data from the CEL file: </B>These CEL values produced by <a href="http://www.affymetrix.com/support/technical/product_updates/gcos_download.affx" target="_blank" class="normalsize">GCOS</a> are 75% quantiles from a set of 91 pixel values per cell.
+<P><B>Probe (cell) level data from the CEL file: </B>These CEL values produced by <span href="http://www.affymetrix.com/support/technical/product_updates/gcos_download.affx" target="_blank" class="normalsize broken_link">GCOS</span> are 75% quantiles from a set of 91 pixel values per cell.
 <OL>
 
 <LI>We added an offset of 1.0 unit to each cell signal to ensure that all values could be logged without generating negative values. We then computed the log base 2 of each cell.
@@ -122,7 +122,7 @@
 <LI>Lu Lu, M.D.  <!--Tissue acquisition, RNA processing, experimental design-->
 <BR>Grant Support: NIH U01AA13499, U24AA13513
 
-<LI><A HREF="http://www.salk.edu/faculty/faculty/details.php?id=23" target="_empty" class="normalsize">Fred H. Gage, Ph.D.</A>  <!--$10,000 contribution -->
+<LI><span HREF="http://www.salk.edu/faculty/faculty/details.php?id=23" target="_empty" class="broken_link normalsize">Fred H. Gage, Ph.D.</span>  <!--$10,000 contribution -->
 <BR>Grant Support: Lookout Foundation
 
 <LI>Dan Goldowitz, Ph.D. <!--$30,000 contribution -->
@@ -134,7 +134,7 @@
 <LI>Shirlean Goodwin, Ph.D.  <!--All array processing-->
 <BR>Grant Support: NIAAA INIA U01AA013515
 
-<LI><A HREF="http://www.bccn-berlin.de/ResearchGroups/Kempermann" target="_empty" class="normalsize">Gerd Kempermann, M.D.</A> <!--$30,000 contribution -->
+<LI><span HREF="http://www.bccn-berlin.de/ResearchGroups/Kempermann"  class="broken_link normalsize">Gerd Kempermann, M.D.</span> <!--$30,000 contribution -->
 <BR>Grant Support: The <A HREF="http://www.volkswagen-stiftung.de/" target="_empty" class="normalsize">Volkswagen Foundation</A> Grant on Permissive and Persistent Factors in Neurogenesis in the Adult Central Nervous System
 <BR>Humboldt-Universitat Berlin
 <BR>Universitatsklinikum Charite
@@ -174,10 +174,10 @@
 </UL>
 </P><br><br></td></tr>
 <tr><td><span style="font-size:115%;font-weight:bold;">Experiment Type:</span></td></tr>
-	<tr><td> <P>Pooled RNA samples (usually one pool of male hippocampii and one pool of female hippocampii) were prepared using standard protocols. Samples were processed using a total of 206 Affymetrix GeneChip Mouse Expression 430 2.0 short oligomer arrays (MOE430 2.0 or M430v2; see GEO platform ID <A HREF="http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GPL1261" target="_empty" class="normalsize">GPL1261</A>), of which 201 passed quality control and error checking. This particular data set was processed using the <a href="http://odin.mdacc.tmc.edu/~zhangli/PerfectMatch/" target="_blank" class="normalsize">PDNN</a> protocol. To simplify comparisons among transforms, PDNN values of each array were adjusted to an average of 8 units and a standard deviation of 2 units.
+	<tr><td> <P>Pooled RNA samples (usually one pool of male hippocampii and one pool of female hippocampii) were prepared using standard protocols. Samples were processed using a total of 206 Affymetrix GeneChip Mouse Expression 430 2.0 short oligomer arrays (MOE430 2.0 or M430v2; see GEO platform ID <A HREF="http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GPL1261" target="_empty" class="normalsize">GPL1261</A>), of which 201 passed quality control and error checking. This particular data set was processed using the <span href="http://odin.mdacc.tmc.edu/~zhangli/PerfectMatch/" target="_blank" class="broken_link normalsize">PDNN</span> protocol. To simplify comparisons among transforms, PDNN values of each array were adjusted to an average of 8 units and a standard deviation of 2 units.
 <br><br></td></tr>
 <tr><td><span style="font-size:115%;font-weight:bold;">Overall Design:</span></td></tr>
-	<tr><td> <P>Pooled RNA samples (usually one pool of male hippocampii and one pool of female hippocampii) were prepared using standard protocols. Samples were processed using a total of 206 Affymetrix GeneChip Mouse Expression 430 2.0 short oligomer arrays (MOE430 2.0 or M430v2; see GEO platform ID <A HREF="http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GPL1261" target="_empty" class="normalsize">GPL1261</A>), of which 201 passed quality control and error checking. This particular data set was processed using the <a href="http://odin.mdacc.tmc.edu/~zhangli/PerfectMatch/" target="_blank" class="normalsize">PDNN</a> protocol. To simplify comparisons among transforms, PDNN values of each array were adjusted to an average of 8 units and a standard deviation of 2 units.
+	<tr><td> <P>Pooled RNA samples (usually one pool of male hippocampii and one pool of female hippocampii) were prepared using standard protocols. Samples were processed using a total of 206 Affymetrix GeneChip Mouse Expression 430 2.0 short oligomer arrays (MOE430 2.0 or M430v2; see GEO platform ID <A HREF="http://www.ncbi.nlm.nih.gov/projects/geo/query/acc.cgi?acc=GPL1261" target="_empty" class="normalsize">GPL1261</A>), of which 201 passed quality control and error checking. This particular data set was processed using the <span href="http://odin.mdacc.tmc.edu/~zhangli/PerfectMatch/" target="_blank" class="broken_link normalsize">PDNN</span> protocol. To simplify comparisons among transforms, PDNN values of each array were adjusted to an average of 8 units and a standard deviation of 2 units.
 <br><br></td></tr>
 <tr><td><span style="font-size:115%;font-weight:bold;">Contributor:</span></td></tr>
 	<tr><td> <UL>
@@ -189,7 +189,7 @@
 <LI>Lu Lu, M.D.  <!--Tissue acquisition, RNA processing, experimental design-->
 <BR>Grant Support: NIH U01AA13499, U24AA13513
 
-<LI><A HREF="http://www.salk.edu/faculty/faculty/details.php?id=23" target="_empty" class="normalsize">Fred H. Gage, Ph.D.</A>  <!--$10,000 contribution -->
+<LI><span HREF="http://www.salk.edu/faculty/faculty/details.php?id=23" target="_empty" class="broken_link normalsize">Fred H. Gage, Ph.D.</span>  <!--$10,000 contribution -->
 <BR>Grant Support: Lookout Foundation
 
 <LI>Dan Goldowitz, Ph.D. <!--$30,000 contribution -->
@@ -201,7 +201,7 @@
 <LI>Shirlean Goodwin, Ph.D.  <!--All array processing-->
 <BR>Grant Support: NIAAA INIA U01AA013515
 
-<LI><A HREF="http://www.bccn-berlin.de/ResearchGroups/Kempermann" target="_empty" class="normalsize">Gerd Kempermann, M.D.</A> <!--$30,000 contribution -->
+<LI><span HREF="http://www.bccn-berlin.de/ResearchGroups/Kempermann"  class="broken_link normalsize">Gerd Kempermann, M.D.</span> <!--$30,000 contribution -->
 <BR>Grant Support: The <A HREF="http://www.volkswagen-stiftung.de/" target="_empty" class="normalsize">Volkswagen Foundation</A> Grant on Permissive and Persistent Factors in Neurogenesis in the Adult Central Nervous System
 <BR>Humboldt-Universitat Berlin
 <BR>Universitatsklinikum Charite
@@ -241,7 +241,7 @@
 </UL><br><br></td></tr>
 <tr><td><span style="font-size:115%;font-weight:bold;">Citation:</span></td></tr>
 	<tr><td>
-<P>Please cite: Overall RW, Kempermann G, Peirce J, Lu L, Goldowitz D, Gage FH, Goodwin S, Smit AB, Airey DC, Rosen GD, Schalkwyk LC, Sutter TR, Nowakowski RS, Whatley S, Williams RW (<a href="http://frontiersin.org/neurogenomics/paper/pending/0/815/"  target="_blank" class="normalsize">2009</a>) Genetics of the hippocampal transcriptome in mice: a systematic survey and online neurogenomic resource. Front. Neurogen. 1:3   <A href="http://frontiersin.org/neurogenomics/paper/pending/0/815/" target="_blank" class="smallsize"><I>Full Text HTML</I></A>  doi:10.3389/neuro.15.003.2009
+<P>Please cite: Overall RW, Kempermann G, Peirce J, Lu L, Goldowitz D, Gage FH, Goodwin S, Smit AB, Airey DC, Rosen GD, Schalkwyk LC, Sutter TR, Nowakowski RS, Whatley S, Williams RW (<span class="broken_link" href="http://frontiersin.org/neurogenomics/paper/pending/0/815/"  target="_blank" class="normalsize">2009</span>) Genetics of the hippocampal transcriptome in mice: a systematic survey and online neurogenomic resource. Front. Neurogen. 1:3   <span href="http://frontiersin.org/neurogenomics/paper/pending/0/815/" target="_blank" class="broken_link smallsize"><I>Full Text HTML</I></A>  doi:10.3389/neuro.15.003.2009
 
 <br><br></td></tr>
 <tr><td><span style="font-size:115%;font-weight:bold;">Submission Date:</span></td></tr>
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
index 12c28e72..31846f87 100644
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -203,14 +203,13 @@
                     </div>
                     <h3>Websites affiliated with GeneNetwork</h3>
                     <ul>
-                        <li><a href="http://ucscbrowser.genenetwork.org/">Genome
-                        browser</a> at UTHSC</li>
+                        <li><span class="broken_link" href="http://ucscbrowser.genenetwork.org/">Genome Browser</span> at UTHSC</li>
 
                         <li><a href="http://galaxy.genenetwork.org/">Galaxy</a> at
                         UTHSC</li>
 
-                        <li>GeneNetwork 1 at <a href="http://ec2.genenetwork.org/">Amazon
-                        Cloud (EC2)</a></li>
+                        <li>GeneNetwork 1 at <span class="broken_link" href="http://ec2.genenetwork.org/">Amazon
+                        Cloud (EC2)</span></li>
 
                         <li>GeneNetwork 1 Source Code at <a href="http://sourceforge.net/projects/genenetwork/">SourceForge</a></li>
 
@@ -220,7 +219,7 @@
 
                     <ul>
                         <li><a href="http://gn1.genenetwork.org/">Main GN1 site at UTHSC</a> (main site)</li>
-                        <li><a href="http://genenetwork.helmholtz-hzi.de/">Germany at the HZI</a></li>
+                        <li><span  class="broken_link" href="http://genenetwork.helmholtz-hzi.de/">Germany at the HZI</span></li>
                         <li><a href="http://genenetwork.org/">Memphis at the U of M</a></li>
                     </ul>
                 </section>
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index cba977ac..50803978 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -135,8 +135,8 @@
             The <a href="http://genenetwork.org/correlationAnnotation.html#literatureCorr">Literature Correlation</a>
             (Lit r) between
             this gene and all other genes is computed<br>
-            using the <a href="https://grits.eecs.utk.edu/sgo/sgo.html">
-            Semantic Gene Organizer</a>
+            using the <span class="broken_link" href="https://grits.eecs.utk.edu/sgo/sgo.html">
+            Semantic Gene Organizer</span>
             and human, rat, and mouse data from PubMed.
             Values are ranked by Lit r, but Sample r and Tissue r are also displayed.<br>
             <a href="http://genenetwork.org/glossary.html#Literature">More on using Lit r</a>
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index b61a93ba..c3575454 100755
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -346,7 +346,7 @@
                 <dd>Maps traits with correction for kinship among samples using a linear mixed model method, and also allows users to fit multiple covariates such as sex, age, treatment, and genetic markers (<a href="https://www.ncbi.nlm.nih.gov/pubmed/24531419">PMID: 2453419</a>, and <a href="https://github.com/genetics-statistics/GEMMA"> GitHub code</a>). GEMMA incorporates the Leave One Chromosome Out (LOCO) method to ensure that the correction for kinship does not remove useful genetic variance near each marker. Markers can be filtered to include only those with minor allele frequencies (MAF) above a threshold. The default MAF is 0.05.</dd>
                 {% elif mapping_method == "R/qtl" %}
                 <dt class="map-method-text">R/qtl (version 1.44.9</dt>
-                <dd>The original R/qtl mapping package that supports classic experimental crosses including 4-parent F2 intercrosses (e.g., NIA ITP UM-HET3). R/qtl is ideal for populations that do not have complex kinship or admixture (<a href="https://www.ncbi.nlm.nih.gov/pubmed/12724300">PMID: 12724300</a>). Both R/qtl as implemented here, and R/qtl2 (<a href="https://www.ncbi.nlm.nih.gov/pubmed/30591514">PMID: 30591514</a>) are available as <a href="https://kbroman.org/pages/software.html">R suites</a>.</dd>
+                <dd>The original R/qtl mapping package that supports classic experimental crosses including 4-parent F2 intercrosses (e.g., NIA ITP UM-HET3). R/qtl is ideal for populations that do not have complex kinship or admixture (<a href="https://www.ncbi.nlm.nih.gov/pubmed/12724300">PMID: 12724300</a>). Both R/qtl as implemented here, and R/qtl2 (<a href="https://www.ncbi.nlm.nih.gov/pubmed/30591514">PMID: 30591514</a>) are available as <span  class="broken-link" href="https://kbroman.org/pages/software.html">R suites</span>.</dd>
                 {% elif mapping_method == "QTLReaper" %}
                 <dt class="map-method-text">Haley-Knott Regression</dt>
                 <dd>Fast linear mapping method (<a href="https://www.ncbi.nlm.nih.gov/pubmed/16718932">PMID 16718932</a>) works well with F2 intercrosses and backcrosses, but that is not recommended for complex or admixed populations (e.g., GWAS or heterogeneous stock studies) or for advanced intercrosses, recombinant inbred families, or diallel crosses. Interactive plots in GeneNetwork have relied on the fast HK mapping for two decades and we still use this method for mapping omics data sets and computing genome-wide permutation threshold (<a href="https://github.com/pjotrp/QTLReaper">QTL Reaper code</a>).</dd>
-- 
cgit v1.2.3


From b53a8362ba1d3031ece2deefdc3309823b932012 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 23:11:45 +0300
Subject: add search for templates

---
 test/requests/links_scraper/genelinks.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index ca98f62f..6a3d363e 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -11,10 +11,25 @@ from urllib.parse import urlparse
 
 
 PORT = os.environ.get("PORT", "5004")
+TEMPLATE_PATH = "../wqflask/wqflask/templates"
 
 BROKEN_LINKS = set()
 
 
+def search_templates():
+    """searches for broken links in templates"""
+    html_parsed_pages = []
+    for subdir, dirs, files in os.walk(TEMPLATE_PATH):
+        for file in files:
+            file_path = os.path.join(subdir, file)
+            if file_path.endswith(".html"):
+                parsed_page = soup(
+                    open(file_path, encoding="utf8"), "html.parser")
+                html_parsed_pages.append(parsed_page)
+
+    return html_parsed_pages
+
+
 def is_valid_link(url_link):
     try:
         result = urlparse(url_link)
@@ -107,6 +122,8 @@ def webpages_to_check():
 
 
 if __name__ == '__main__':
+    # results = search_templates()
+
     for page in webpages_to_check():
         fetch_page_links(page)
         if len(BROKEN_LINKS) > 0:
-- 
cgit v1.2.3


From 187415f223b101f8c0b0ac100b2cf8e19c0ad3a5 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 13 Feb 2021 08:56:36 +0300
Subject: refactor to use idiomatic python

---
 test/requests/links_scraper/genelinks.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 6a3d363e..12300f4a 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -49,10 +49,7 @@ def test_link(link):
     except Exception as e:
         status_code = 408
 
-    if int(status_code) > 403:
-        return True
-
-    return False
+    return int(status_code) > 403
 
 
 def fetch_css_links(parsed_page):
-- 
cgit v1.2.3