import csv
import sys
import html
import json
import requests
from lxml import etree
from pathlib import Path
from lxml.html import parse
from functools import reduce
from link_checker import check_page
def corrs_base_data():
return [
{
"dataset": "HC_M2_0606_P",
"trait_id": "1435464_at",
"corr_dataset": "HC_M2_0606_P",
},
{
"dataset": "HC_M2_0606_P",
"trait_id": "1457545_at",
"corr_dataset": "HC_M2_0606_R",
},
{
"dataset": "HC_M2_0606_P",
"trait_id": "1442370_at",
"corr_dataset": "BXDPublish",
}
]
def sample_vals():
return '{"C57BL/6J":"10.835","DBA/2J":"11.142","B6D2F1":"11.126","D2B6F1":"11.143","BXD1":"10.811","BXD2":"11.503","BXD5":"10.766","BXD6":"10.986","BXD8":"11.050","BXD9":"10.822","BXD11":"10.670","BXD12":"10.946","BXD13":"10.890","BXD14":"x","BXD15":"10.884","BXD16":"11.222","BXD18":"x","BXD19":"10.968","BXD20":"10.962","BXD21":"10.906","BXD22":"11.080","BXD23":"11.046","BXD24":"11.146","BXD24a":"x","BXD25":"x","BXD27":"11.078","BXD28":"11.034","BXD29":"10.808","BXD30":"x","BXD31":"11.087","BXD32":"11.029","BXD33":"10.662","BXD34":"11.482","BXD35":"x","BXD36":"x","BXD37":"x","BXD38":"10.836","BXD39":"10.926","BXD40":"10.638","BXD41":"x","BXD42":"10.974","BXD43":"10.828","BXD44":"10.900","BXD45":"11.358","BXD48":"11.042","BXD48a":"10.975","BXD49":"x","BXD50":"11.228","BXD51":"11.126","BXD52":"x","BXD53":"x","BXD54":"x","BXD55":"11.580","BXD56":"x","BXD59":"x","BXD60":"10.829","BXD61":"11.152","BXD62":"11.156","BXD63":"10.942","BXD64":"10.506","BXD65":"11.126","BXD65a":"11.272","BXD65b":"11.157","BXD66":"11.071","BXD67":"11.080","BXD68":"10.997","BXD69":"11.096","BXD70":"11.152","BXD71":"x","BXD72":"x","BXD73":"11.262","BXD73a":"11.444","BXD73b":"x","BXD74":"10.974","BXD75":"11.150","BXD76":"10.920","BXD77":"10.928","BXD78":"x","BXD79":"11.371","BXD81":"x","BXD83":"10.946","BXD84":"11.181","BXD85":"10.992","BXD86":"10.770","BXD87":"11.200","BXD88":"x","BXD89":"10.930","BXD90":"11.183","BXD91":"x","BXD93":"11.056","BXD94":"10.737","BXD95":"x","BXD98":"10.986","BXD99":"10.892","BXD100":"x","BXD101":"x","BXD102":"x","BXD104":"x","BXD105":"x","BXD106":"x","BXD107":"x","BXD108":"x","BXD109":"x","BXD110":"x","BXD111":"x","BXD112":"x","BXD113":"x","BXD114":"x","BXD115":"x","BXD116":"x","BXD117":"x","BXD119":"x","BXD120":"x","BXD121":"x","BXD122":"x","BXD123":"x","BXD124":"x","BXD125":"x","BXD126":"x","BXD127":"x","BXD128":"x","BXD128a":"x","BXD130":"x","BXD131":"x","BXD132":"x","BXD133":"x","BXD134":"x","BXD135":"x","BXD136":"x","BXD137":"x","BXD138":"x","BXD139":"x","BXD141":"x","BXD142":"x","BXD144":"x","BXD145":"x","BXD146":"x","BXD147":"x","BXD148":"x","BXD149":"x","BXD150":"x","BXD151":"x","BXD152":"x","BXD153":"x","BXD154":"x","BXD155":"x","BXD156":"x","BXD157":"x","BXD160":"x","BXD161":"x","BXD162":"x","BXD165":"x","BXD168":"x","BXD169":"x","BXD170":"x","BXD171":"x","BXD172":"x","BXD173":"x","BXD174":"x","BXD175":"x","BXD176":"x","BXD177":"x","BXD178":"x","BXD180":"x","BXD181":"x","BXD183":"x","BXD184":"x","BXD186":"x","BXD187":"x","BXD188":"x","BXD189":"x","BXD190":"x","BXD191":"x","BXD192":"x","BXD193":"x","BXD194":"x","BXD195":"x","BXD196":"x","BXD197":"x","BXD198":"x","BXD199":"x","BXD200":"x","BXD201":"x","BXD202":"x","BXD203":"x","BXD204":"x","BXD205":"x","BXD206":"x","BXD207":"x","BXD208":"x","BXD209":"x","BXD210":"x","BXD211":"x","BXD212":"x","BXD213":"x","BXD214":"x","BXD215":"x","BXD216":"x","BXD217":"x","BXD218":"x","BXD219":"x","BXD220":"x"}'
def do_request(url, data):
response = requests.post(
url,
data={
"dataset": "HC_M2_0606_P",
"trait_id": "1435464_at",
"corr_dataset": "HC_M2_0606_P",
"corr_sample_method": "pearson",
"corr_return_results": "100",
"corr_samples_group": "samples_primary",
"sample_vals": sample_vals(),
"location_type": "gene",
**data,
})
while response.text.find('') >= 0:
response = requests.get(response.url)
pass
return response
def check_sample_correlations(baseurl, base_data):
data = {
**base_data,
"corr_type": "sample",
"corr_sample_method": "pearson",
"location_type": "gene",
"corr_return_results": "200"
}
top_n_message = "The top 200 correlations ranked by the Genetic Correlation"
result = do_request(f"{baseurl}/corr_compute", data)
assert result.status_code == 200
assert (result.text.find(f"Values of record {base_data['trait_id']}") >= 0), result.text
assert (result.text.find(top_n_message) >= 0), result.text
def check_tissue_correlations(baseurl, base_data):
data = {
**base_data,
"corr_type": "tissue",
"location_type": "gene",
}
result = do_request(f"{baseurl}/corr_compute", data)
assert result.status_code == 200
if (data["trait_id"] == "1442370_at"
and data["corr_dataset"] in ("BXDPublish",)):
top_n_message = (
"It is not possible to compute the 'Tissue' correlations between "
f"trait '{data['trait_id']}' and the data")
else:
top_n_message = "The top 100 correlations ranked by the Tissue Correlation"
assert (result.text.find(f"Values of record {base_data['trait_id']}") >= 0), result.text
assert (html.unescape(result.text).find(top_n_message) >= 0), (
f"NOT FOUND: {top_n_message}")
def check_lit_correlations(baseurl, base_data):
data = {
**base_data,
"corr_type": "lit",
"corr_return_results": "200"
}
result = do_request(f"{baseurl}/corr_compute", data)
assert result.status_code == 200
if (data["trait_id"] == "1442370_at"
and data["corr_dataset"] in ("BXDPublish",)):
top_n_message = (
"It is not possible to compute the 'Literature' correlations "
f"between trait '{data['trait_id']}' and the data")
else:
top_n_message = "The top 200 correlations ranked by the Literature Correlation"
assert (result.text.find(f"Values of record {base_data['trait_id']}") >= 0), result.text
assert (html.unescape(result.text).find(top_n_message) >= 0), (
f"NOT FOUND: {top_n_message}")
def check_correlations(args_obj, parser):
print("")
print("Checking the correlations...")
corr_type_fns = {
"sample": check_sample_correlations,
"tissue": check_tissue_correlations,
"lit": check_lit_correlations
}
host = args_obj.host
failure = False
for corr_type, corr_type_fn in corr_type_fns.items():
for corr_base in corrs_base_data():
try:
print(f"\tChecking {corr_type} correlations...", end="")
corr_type_fn(host, corr_base)
print(" ok")
except AssertionError as asserterr:
print (f" fail: {asserterr.args[0]}")
failure = True
if failure:
print("FAIL!")
sys.exit(1)
print("OK")
def thread(value, *functions):
return reduce(lambda result, func: func(result), functions, value)
def parse_results_from_html(raw_html):
doc = etree.HTML(raw_html)
scripts = doc.xpath('//script')
for script in scripts:
script_content = thread(
script.xpath('.//child::text()'),
lambda val: "".join(val).strip())
if script_content.find("var tableJson") >= 0:
return {
str(row["trait_id"]): row for row in
json.loads(thread(
script_content,
lambda val: val[len("var tableJson = "):].strip().replace(
"\\r\\n", "\\n")))}
return {}
def parse_expected(filepath):
with open(filepath, encoding="utf-8") as infl:
reader = csv.DictReader(infl, dialect=csv.unix_dialect)
for line in reader:
yield line
def collect_failures(actual, expected, keys):
# assert len(actual) == len(expected), (
# f"Expected {len(expected)} results but instead got {len(actual)} "
# "results")
def __equal(trait_id, act_row, exp_row):
if act_row is None:
return (f"Could not find trait '{trait_id}' in actual results",)
__eq = tuple()
for act_key, exp_key, title in keys:
act_val, exp_val = (
str(act_row[act_key]).strip(), str(exp_row[exp_key]).strip())
if act_val == exp_val:
# __eq = __eq + ("PASSED",)
continue
__eq = __eq + ((
f"Trait '{trait_id}': "
f"Different '{title}' values: expected:\n\t\t'{exp_val}'"
"\n\nbut got\n"
f"\n\t\t'{act_val}'"),)
continue
return __eq
return tuple(
item for item in (
__equal(str(exp_row["Record"]),
actual.get(str(exp_row["Record"])),
exp_row)
for exp_row in expected)
if bool(item))
def check_correctness(host):
# pearsons_keys = (
# ("trait_id", "Record ID", "Trait/Record ID"),
# ("sample_r", "Sample r ?", "Sample r value"),
# ("num_overlap", "N Cases", "N Cases"),
# ("sample_p", "Sample p(r) ?", "Sample p value"),
# ("symbol", "Symbol", "Symbol"),
# ("description", "Description", "Description"),
# ("location", "Location Chr and Mb", "Location Chr and Mb"),
# ("mean", "Mean Expr", "Mean"),
# ("lrs_location", "Max LRS Location Chr and Mb", "Max LRS Location Chr and Mb"),
# ("lit_corr", "Lit Corr ?", "Literature Correlation"),
# ("tissue_corr", "Tissue r ?", "Tissue Correlation r"),
# ("tissue_pvalue", "Tissue p(r) ?", "Tissue Correlation p value"))
pearsons_keys = (
("trait_id", "Record", "Trait/Record ID"),
("sample_r", "Sample r", "Sample r value"),
("num_overlap", "N", "N Cases"),
("sample_p", "Sample p(r)", "Sample p value"),
("description", "Description", "Description"))
spearmans_keys = (
("trait_id", "Record ID", "Trait/Record ID"),
("sample_r", "Sample rho ?", "Sample rho value"),
("num_overlap", "N Cases", "N Cases"),
("sample_p", "Sample p(rho) ?", "Sample p(rho) value"),
("symbol", "Symbol", "Symbol"),
("description", "Description", "Description"),
("location", "Location Chr and Mb", "Location Chr and Mb"),
("mean", "Mean Expr", "Mean"),
("lrs_location", "Max LRS Location Chr and Mb", "Max LRS Location Chr and Mb"),
("lit_corr", "Lit Corr ?", "Literature Correlation"),
("tissue_corr", "Tissue rho ?", "Tissue Correlation rho"),
("tissue_pvalue", "Tissue p(rho) ?", "Tissue Correlation p(rho) value"))
failures = {}
tests = [
("Trait '10710' (Dataset 'BXDPublish'): Sample Correlation, Pearson, 500 results",
{"dataset": "BXDPublish", "trait_id": "10710",
"corr_dataset": "BXDPublish", "corr_type": "sample",
"corr_sample_method": "pearson", "location_type": "highest_lod",
"corr_samples_group": "samples_primary",
"sample_vals": '{"C57BL/6J":"23.000","DBA/2J":"21.390","B6D2F1":"x","D2B6F1":"x","BXD1":"25.505","BXD2":"20.197","BXD5":"27.270","BXD6":"18.768","BXD8":"21.440","BXD9":"23.974","BXD11":"24.309","BXD12":"20.669","BXD13":"18.857","BXD14":"21.035","BXD15":"21.350","BXD16":"20.869","BXD18":"20.812","BXD19":"22.859","BXD20":"19.768","BXD21":"23.424","BXD22":"25.430","BXD23":"18.924","BXD24":"22.433","BXD24a":"x","BXD25":"19.590","BXD27":"19.938","BXD28":"20.123","BXD29":"18.741","BXD30":"19.160","BXD31":"20.330","BXD32":"25.748","BXD33":"23.531","BXD34":"22.670","BXD35":"20.276","BXD36":"21.417","BXD37":"x","BXD38":"19.805","BXD39":"21.827","BXD40":"23.241","BXD41":"x","BXD42":"24.039","BXD43":"21.778","BXD44":"26.300","BXD45":"22.730","BXD48":"x","BXD48a":"x","BXD49":"x","BXD50":"x","BXD51":"24.827","BXD52":"x","BXD53":"x","BXD54":"x","BXD55":"x","BXD56":"x","BXD59":"x","BXD60":"24.055","BXD61":"x","BXD62":"25.336","BXD63":"22.865","BXD64":"x","BXD65":"x","BXD65a":"21.949","BXD65b":"21.836","BXD66":"x","BXD67":"x","BXD68":"x","BXD69":"22.643","BXD70":"x","BXD71":"x","BXD72":"x","BXD73":"23.606","BXD73a":"x","BXD73b":"x","BXD74":"x","BXD75":"22.097","BXD76":"x","BXD77":"24.020","BXD78":"x","BXD79":"x","BXD81":"x","BXD83":"23.811","BXD84":"x","BXD85":"22.137","BXD86":"26.518","BXD87":"21.136","BXD88":"x","BXD89":"20.182","BXD90":"22.480","BXD91":"x","BXD93":"x","BXD94":"x","BXD95":"x","BXD98":"x","BXD99":"x","BXD100":"x","BXD101":"x","BXD102":"x","BXD104":"x","BXD105":"x","BXD106":"x","BXD107":"x","BXD108":"x","BXD109":"x","BXD110":"x","BXD111":"x","BXD112":"x","BXD113":"x","BXD114":"x","BXD115":"x","BXD116":"x","BXD117":"x","BXD119":"x","BXD120":"x","BXD121":"x","BXD122":"x","BXD123":"x","BXD124":"x","BXD125":"x","BXD126":"x","BXD127":"x","BXD128":"x","BXD128a":"x","BXD130":"x","BXD131":"x","BXD132":"x","BXD133":"x","BXD134":"x","BXD135":"x","BXD136":"x","BXD137":"x","BXD138":"x","BXD139":"x","BXD141":"x","BXD142":"x","BXD144":"x","BXD145":"x","BXD146":"x","BXD147":"x","BXD148":"x","BXD149":"x","BXD150":"x","BXD151":"x","BXD152":"x","BXD153":"x","BXD154":"x","BXD155":"x","BXD156":"x","BXD157":"x","BXD160":"x","BXD161":"x","BXD162":"x","BXD165":"x","BXD168":"x","BXD169":"x","BXD170":"x","BXD171":"x","BXD172":"x","BXD173":"x","BXD174":"x","BXD175":"x","BXD176":"x","BXD177":"x","BXD178":"x","BXD180":"x","BXD181":"x","BXD183":"x","BXD184":"x","BXD186":"x","BXD187":"x","BXD188":"x","BXD189":"x","BXD190":"x","BXD191":"x","BXD192":"x","BXD193":"x","BXD194":"x","BXD195":"x","BXD196":"x","BXD197":"x","BXD198":"x","BXD199":"x","BXD200":"x","BXD201":"x","BXD202":"x","BXD203":"x","BXD204":"x","BXD205":"x","BXD206":"x","BXD207":"x","BXD208":"x","BXD209":"x","BXD210":"x","BXD211":"x","BXD212":"x","BXD213":"x","BXD214":"x","BXD215":"x","BXD216":"x","BXD217":"x","BXD218":"x","BXD219":"x","BXD220":"x"}',
"corr_return_results": "500"},
"BXD_10710_vs_BXDPublish.csv",
pearsons_keys),
]
for test_title, test_data, expected_file, method_keys in tests:
print(f"Test: {test_title} ...", end="\t")
response = requests.post(f"{host}/corr_compute", data=test_data)
while response.text.find('') >= 0:
response = requests.get(response.url)
pass
results = parse_results_from_html(response.text)
if len(results) == 0:
failures = {
**failures,
test_title: (("No results found.",),)}
continue
filepath = Path.cwd().parent.joinpath(
f"test/requests/correlation_results_text_files/{expected_file}")
failures = {
key: value for key,value in {
**failures,
test_title: collect_failures(
results, tuple(parse_expected(filepath)), method_keys)
}.items() if len(value) > 0
}
if len(failures) > 0:
print("\n\nFAILURES: ")
for test_title, failures in failures.items():
print(f"\nTest: {test_title}")
for result, result_failures in enumerate(failures):
for failure in result_failures:
print(f"\tResult {result}: {failure}")
print_newline = True
if len(result_failures) > 0:
print("")
print("")
return False
return True
def check_correlations_correctness(args_obj, parser):
print("")
print("Checking the correctness of the correlations...")
if not check_correctness(args_obj.host):
sys.exit(1)