1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
import re
import requests
from lxml.html import parse
from requests.exceptions import ConnectionError
DO_FAIL=False # fail on error
def is_root_link(link):
pattern = re.compile("^/$")
return pattern.match(link)
def is_mailto_link(link):
pattern = re.compile("^mailto:.*")
return pattern.match(link)
def is_internal_link(link):
pattern = re.compile("^/.*")
return pattern.match(link)
def is_in_page_link(link):
pattern = re.compile("^#.*")
return pattern.match(link)
def get_links(doc):
return [x for x in [y.get("href") for y in doc.cssselect("a")] if not (
is_root_link(x)
or is_mailto_link(x))]
def verify_link(link):
if link[0] == "#":
# local link on page
return
print("verifying "+link)
try:
result = requests.get(link, timeout=20, verify=False)
if result.status_code == 200:
print(link+" ==> OK")
elif result.status_code == 307:
print(link+" ==> REDIRECT")
else:
print("ERROR: link `"+link+"` failed with status "
, result.status_code)
if DO_FAIL:
raise Exception("Failed verify")
except ConnectionError as ex:
print("ERROR: ", link, ex)
if DO_FAIL:
raise ex
def verify_static_file(link):
print("verifying "+link)
try:
result = requests.get(link, timeout=20, verify=False)
if (result.status_code == 200 and
result.content.find(bytes("Error: 404 Not Found", "utf-8")) <= 0):
print(link+" ==> OK")
else:
print("ERROR: link {}".format(link))
raise Exception("Failed verify")
except ConnectionError as ex:
print("ERROR: ", link, ex)
def check_page(host, start_url):
print("")
print("Checking links host "+host+" in page `"+start_url+"`")
doc = parse(start_url).getroot()
links = get_links(doc)
in_page_links = list(filter(is_in_page_link, links))
internal_links = list(filter(is_internal_link, links))
external_links = [x for x in links if not (is_internal_link(x) or is_in_page_link(x))]
for link in internal_links:
verify_link(host+link)
for link in external_links:
verify_link(link)
def check_links(args_obj, parser):
print("")
print("Checking links")
host = args_obj.host
# Check the home page
check_page(host, host)
# Check traits page
check_page(
host,
host+"/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P")
def check_packaged_js_files(args_obj, parser):
host = args_obj.host
js_files = [
# Datatables Extensions:
"/css/DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css",
"/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js",
"/css/DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css",
"/js/DataTablesExtensions/buttons/js/dataTables.buttons.min.js",
"/js/DataTablesExtensions/colResize/dataTables.colResize.js",
"/js/DataTablesExtensions/colReorder/js/dataTables.colReorder.js",
"/js/DataTablesExtensions/buttons/js/buttons.colVis.min.js",
"/js/DataTablesExtensions/scroller/js/scroller.dataTables.min.js",
"/js/DataTables/js/jquery.dataTables.js",
"/css/DataTablesExtensions/scroller/css/scroller.dataTables.min.css",
# Datatables plugins:
"/js/DataTablesExtensions/plugins/sorting/natural.js",
"/js/DataTablesExtensions/plugins/sorting/scientific.js",
# Other js libraries
"/js/chroma/chroma.min.js",
"/js/d3-tip/d3-tip.js",
"/js/d3js/d3.min.js",
"/js/js_alt/underscore.min.js",
"/js/nvd3/nv.d3.min.css",
"/js/qtip2/jquery.qtip.min.js",
"/js/js_alt/md5.min.js",
"/js/underscore-string/underscore.string.min.js",
"/js/js_alt/jstat.min.js",
"/js/js_alt/parsley.min.js",
"/js/js_alt/timeago.min.js",
"/js/plotly/plotly.min.js",
"/js/ckeditor/ckeditor.js",
"/js/jszip/jszip.min.js",
"/js/jscolor/jscolor.js",
"/js/DataTables/js/jquery.js",
"/css/DataTables/css/jquery.dataTables.css",
"/js/colorbox/jquery.colorbox-min.js",
"/css/nouislider/nouislider.min.css",
"/js/nouislider/nouislider.js",
"/js/purescript-genome-browser/js/purescript-genetics-browser.js",
"/js/purescript-genome-browser/css/purescript-genetics-browser.css",
"/js/cytoscape/cytoscape.min.js",
"/js/cytoscape-panzoom/cytoscape-panzoom.js",
"/js/cytoscape-panzoom/cytoscape.js-panzoom.css",
"/js/cytoscape-qtip/cytoscape-qtip.js",
"/css/d3-tip/d3-tip.css",
"/js/javascript-twitter-post-fetcher/js/twitterFetcher_min.js",
"/js/DataTables/images/sort_asc_disabled.png",
"/js/DataTables/images/sort_desc_disabled.png",
]
print("Checking links")
for link in js_files:
verify_static_file(host+link)
|