diff options
author | Arun Isaac | 2023-12-29 18:55:37 +0000 |
---|---|---|
committer | Arun Isaac | 2023-12-29 19:01:46 +0000 |
commit | 204a308be0f741726b9a620d88fbc22b22124c81 (patch) | |
tree | b3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/wqflask/marker_regression/qtlreaper_mapping.py | |
parent | 83062c75442160427b50420161bfcae2c5c34c84 (diff) | |
download | genenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz |
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for
"correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/wqflask/marker_regression/qtlreaper_mapping.py')
-rw-r--r-- | gn2/wqflask/marker_regression/qtlreaper_mapping.py | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/gn2/wqflask/marker_regression/qtlreaper_mapping.py b/gn2/wqflask/marker_regression/qtlreaper_mapping.py new file mode 100644 index 00000000..2c9ca1b2 --- /dev/null +++ b/gn2/wqflask/marker_regression/qtlreaper_mapping.py @@ -0,0 +1,193 @@ +import os +import math +import string +import random +import json +import re + +from gn2.base import webqtlConfig +from gn2.base.trait import GeneralTrait +from gn2.base.data_set import create_dataset +from gn2.utility.tools import flat_files, REAPER_COMMAND, TEMPDIR + + +def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): + """Generates p-values for each marker using qtlreaper""" + + if first_run: + if this_dataset.group.genofile != None: + genofile_name = this_dataset.group.genofile[:-5] + else: + genofile_name = this_dataset.group.name + + trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" + gen_pheno_txt_file(samples, vals, trait_filename) + + output_filename = (f"{this_dataset.group.name}_GWA_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) + bootstrap_filename = None + permu_filename = None + + opt_list = [] + if boot_check and num_bootstrap > 0: + bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_" + + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + ) + + opt_list.append("-b") + opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") + opt_list.append( + f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") + if num_perm > 0: + permu_filename = ("{this_dataset.group.name}_PERM_" + + ''.join(random.choice(string.ascii_uppercase + + string.digits) for _ in range(6)) + ) + opt_list.append("-n " + str(num_perm)) + opt_list.append( + "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") + if control_marker != "" and do_control == "true": + opt_list.append("-c " + control_marker) + if manhattan_plot != True: + opt_list.append("--interval 1") + + reaper_command = (REAPER_COMMAND + + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), + + genofile_name, + TEMPDIR, + trait_filename, + " ".join( + opt_list), + webqtlConfig.GENERATED_IMAGE_DIR, + output_filename)) + os.system(reaper_command) + else: + output_filename, permu_filename, bootstrap_filename = output_files + + marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( + output_filename, permu_filename, bootstrap_filename) + + suggestive = 0 + significant = 0 + if len(permu_vals) > 0: + suggestive = permu_vals[int(num_perm * 0.37 - 1)] + significant = permu_vals[int(num_perm * 0.95 - 1)] + + return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, + [output_filename, permu_filename, bootstrap_filename]) + + +def gen_pheno_txt_file(samples, vals, trait_filename): + """Generates phenotype file for GEMMA""" + + with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile: + outfile.write("Trait\t") + + filtered_sample_list = [] + filtered_vals_list = [] + for i, sample in enumerate(samples): + if vals[i] != "x": + filtered_sample_list.append(sample) + filtered_vals_list.append(vals[i]) + + samples_string = "\t".join(filtered_sample_list) + outfile.write(samples_string + "\n") + outfile.write("T1\t") + values_string = "\t".join(filtered_vals_list) + outfile.write(values_string) + + +def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename): + included_markers = [] + p_values = [] + marker_obs = [] + + only_cm = False + only_mb = False + + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{gwa_filename}.txt") as output_file: + for line in output_file: + if line.startswith("ID\t"): + if len(line.split("\t")) < 8: + if 'cM' in line.split("\t"): + only_cm = True + else: + only_mb = True + continue + else: + marker = {} + marker['name'] = line.split("\t")[1] + try: + marker['chr'] = int(line.split("\t")[2]) + except: + marker['chr'] = line.split("\t")[2] + if only_cm or only_mb: + if only_cm: + marker['cM'] = float(line.split("\t")[3]) + else: + if float(line.split("\t")[3]) > 1000: + marker['Mb'] = float(line.split("\t")[3]) / 1000000 + else: + marker['Mb'] = float(line.split("\t")[3]) + if float(line.split("\t")[6]) != 1: + marker['p_value'] = float(line.split("\t")[6]) + marker['lrs_value'] = float(line.split("\t")[4]) + marker['lod_score'] = marker['lrs_value'] / 4.61 + marker['additive'] = float(line.split("\t")[5]) + else: + marker['cM'] = float(line.split("\t")[3]) + if float(line.split("\t")[4]) > 1000: + marker['Mb'] = float(line.split("\t")[4]) / 1000000 + else: + marker['Mb'] = float(line.split("\t")[4]) + if float(line.split("\t")[7]) != 1: + marker['p_value'] = float(line.split("\t")[7]) + marker['lrs_value'] = float(line.split("\t")[5]) + marker['lod_score'] = marker['lrs_value'] / 4.61 + marker['additive'] = float(line.split("\t")[6]) + marker_obs.append(marker) + + # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc + sorted_indices = natural_sort(marker_obs) + + permu_vals = [] + if permu_filename: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{permu_filename}.txt") as permu_file: + for line in permu_file: + permu_vals.append(float(line)) + + bootstrap_vals = [] + if bootstrap_filename: + with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") as bootstrap_file: + for line in bootstrap_file: + bootstrap_vals.append(int(line)) + + marker_obs = [marker_obs[i] for i in sorted_indices] + if len(bootstrap_vals) > 0: + bootstrap_vals = [bootstrap_vals[i] for i in sorted_indices] + + return marker_obs, permu_vals, bootstrap_vals + + +def natural_sort(marker_list): + """ + Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort + Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results + """ + + def convert(text): + if text.isdigit(): + return int(text) + else: + if text != "M": + return text.lower() + else: + return "z" + + alphanum_key = lambda key: [convert(c) for c in re.split( + '([0-9]+)', str(marker_list[key]['chr']))] + return sorted(list(range(len(marker_list))), key=alphanum_key) |