aboutsummaryrefslogtreecommitdiff
path: root/test/requests/link_checker.py
blob: 86222254e67a4bba6c3f5cd794394fd8d92d162c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import re
import requests
from lxml.html import document_fromstring
from requests.exceptions import ConnectionError

DO_FAIL=False  # fail on error

def is_root_link(link):
    pattern = re.compile("^/$")
    return pattern.match(link)

def is_mailto_link(link):
    pattern = re.compile("^mailto:.*")
    return pattern.match(link)

def is_internal_link(link):
    pattern = re.compile("^/.*")
    return pattern.match(link)

def is_in_page_link(link):
    pattern = re.compile("^#.*")
    return pattern.match(link)

def get_links(doc):
    return [x for x in [y.get("href") for y in doc.cssselect("a")] if not (
            is_root_link(x)
            or is_mailto_link(x))]

def verify_link(link):
    if link[0] == "#":
        # local link on page
        return
    print("verifying "+link)
    try:
        result = requests.get(link, timeout=20, verify=False)
        if result.status_code == 200:
            print(link+" ==> OK")
        elif result.status_code == 307:
            print(link+" ==> REDIRECT")
        else:
            print("ERROR: link `"+link+"` failed with status "
                  , result.status_code)

            if DO_FAIL:
                raise Exception("Failed verify")
    except ConnectionError as ex:
        print("ERROR: ", link, ex)
        if DO_FAIL:
            raise ex


def verify_static_file(link):
    print("verifying "+link)
    try:
        result = requests.get(link, timeout=20, verify=False)
        if (result.status_code == 200 and
                result.content.find(bytes("Error: 404 Not Found", "utf-8")) <= 0):
            print(link+" ==> OK")
        else:
            print("ERROR: link {}".format(link))
            raise Exception("Failed verify")
    except ConnectionError as ex:
        print("ERROR: ", link, ex)


def check_page(host, start_url):
    print("")
    print("Checking links host "+host+" in page `"+start_url+"`")
    req = requests.get(start_url)
    doc = document_fromstring(start_url)
    links = get_links(doc)
    in_page_links = list(filter(is_in_page_link, links))
    internal_links = list(filter(is_internal_link, links))
    external_links = [x for x in links if not (is_internal_link(x) or is_in_page_link(x))]

    for link in internal_links:
        verify_link(host+link)

    for link in external_links:
        verify_link(link)

def check_links(args_obj, parser):
    print("")
    print("Checking links")
    host = args_obj.host

    # Check the home page
    check_page(host, host)

    # Check traits page
    check_page(
        host,
        host+"/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P")


def check_packaged_js_files(args_obj, parser):
    host = args_obj.host
    js_files = [
        # Datatables Extensions:
        "/css/DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css",
        "/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js",
        "/css/DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css",
        "/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js",
        "/js/DataTablesExtensions/colResize/dataTables.colResize.js",
        "/js/DataTablesExtensions/colReorder/js/dataTables.colReorder.js",
        "/js/DataTablesExtensions/buttons/js/buttons.colVis.min.js",
        "/js/DataTablesExtensions/scroller/js/dataTables.scroller.min.js",
        "/js/DataTables/js/jquery.dataTables.js",
        "/js/DataTablesExtensions/scrollerStyle/css/scroller.dataTables.min.css",
        # Datatables plugins:
        "/js/DataTablesExtensions/plugins/sorting/natural.js",
        "/js/DataTablesExtensions/plugins/sorting/scientific.js",
        # Other js libraries
        "/js/chroma/chroma.min.js",
        "/js/d3-tip/d3-tip.js",
        "/js/d3js/d3.min.js",
        "/js/js_alt/underscore.min.js",
        "/js/nvd3/nv.d3.min.css",
        "/js/qtip2/jquery.qtip.min.js",
        "/js/js_alt/md5.min.js",
        "/js/bootstrap/js/bootstrap.min.js",
        "/css/bootstrap/css/bootstrap.css",
        "/js/jquery-ui/jquery-ui.min.js",
        "/js/jquery-cookie/jquery.cookie.js",
        "/js/jquery/jquery.min.js",
        "/js/typeahead/typeahead.bundle.js",
        "/js/underscore-string/underscore.string.min.js",
        "/js/js_alt/jstat.min.js",
        "/js/js_alt/parsley.min.js",
        "/js/js_alt/timeago.min.js",
        "/js/plotly/plotly.min.js",
        "/js/ckeditor/ckeditor.js",
        "/js/jszip/jszip.min.js",
        "/js/jscolor/jscolor.js",
        "/js/DataTables/js/jquery.js",
        "/css/DataTables/css/jquery.dataTables.css",
        "/js/colorbox/jquery.colorbox-min.js",
        "/css/nouislider/nouislider.min.css",
        "/js/nouislider/nouislider.js",
        "/js/purescript-genome-browser/js/purescript-genetics-browser.js",
        "/js/purescript-genome-browser/css/purescript-genetics-browser.css",
        "/js/cytoscape/cytoscape.min.js",
        "/js/cytoscape-panzoom/cytoscape-panzoom.js",
        "/js/cytoscape-panzoom/cytoscape.js-panzoom.css",
        "/js/cytoscape-qtip/cytoscape-qtip.js",
        "/css/d3-tip/d3-tip.css",
        "/js/zxcvbn/zxcvbn.js",
        "/js/javascript-twitter-post-fetcher/js/twitterFetcher_min.js",
        "/js/DataTables/images/sort_asc_disabled.png",
        "/js/DataTables/images/sort_desc_disabled.png",
        "/js/shapiro-wilk/shapiro-wilk.js",
    ]

    print("Checking links")
    for link in js_files:
        verify_static_file(host+link)