aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/marker_regression/qtlreaper_mapping.py
diff options
context:
space:
mode:
authorArun Isaac2023-12-29 18:55:37 +0000
committerArun Isaac2023-12-29 19:01:46 +0000
commit204a308be0f741726b9a620d88fbc22b22124c81 (patch)
treeb3cf66906674020b530c844c2bb4982c8a0e2d39 /gn2/wqflask/marker_regression/qtlreaper_mapping.py
parent83062c75442160427b50420161bfcae2c5c34c84 (diff)
downloadgenenetwork2-204a308be0f741726b9a620d88fbc22b22124c81.tar.gz
Namespace all modules under gn2.
We move all modules under a gn2 directory. This is important for "correct" packaging and deployment as a Guix service.
Diffstat (limited to 'gn2/wqflask/marker_regression/qtlreaper_mapping.py')
-rw-r--r--gn2/wqflask/marker_regression/qtlreaper_mapping.py193
1 files changed, 193 insertions, 0 deletions
diff --git a/gn2/wqflask/marker_regression/qtlreaper_mapping.py b/gn2/wqflask/marker_regression/qtlreaper_mapping.py
new file mode 100644
index 00000000..2c9ca1b2
--- /dev/null
+++ b/gn2/wqflask/marker_regression/qtlreaper_mapping.py
@@ -0,0 +1,193 @@
+import os
+import math
+import string
+import random
+import json
+import re
+
+from gn2.base import webqtlConfig
+from gn2.base.trait import GeneralTrait
+from gn2.base.data_set import create_dataset
+from gn2.utility.tools import flat_files, REAPER_COMMAND, TEMPDIR
+
+
+def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None):
+ """Generates p-values for each marker using qtlreaper"""
+
+ if first_run:
+ if this_dataset.group.genofile != None:
+ genofile_name = this_dataset.group.genofile[:-5]
+ else:
+ genofile_name = this_dataset.group.name
+
+ trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno"
+ gen_pheno_txt_file(samples, vals, trait_filename)
+
+ output_filename = (f"{this_dataset.group.name}_GWA_"
+ + ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(6))
+ )
+ bootstrap_filename = None
+ permu_filename = None
+
+ opt_list = []
+ if boot_check and num_bootstrap > 0:
+ bootstrap_filename = (f"{this_dataset.group.name}_BOOTSTRAP_"
+ + ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(6))
+ )
+
+ opt_list.append("-b")
+ opt_list.append(f"--n_bootstrap {str(num_bootstrap)}")
+ opt_list.append(
+ f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt")
+ if num_perm > 0:
+ permu_filename = ("{this_dataset.group.name}_PERM_"
+ + ''.join(random.choice(string.ascii_uppercase
+ + string.digits) for _ in range(6))
+ )
+ opt_list.append("-n " + str(num_perm))
+ opt_list.append(
+ "--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt")
+ if control_marker != "" and do_control == "true":
+ opt_list.append("-c " + control_marker)
+ if manhattan_plot != True:
+ opt_list.append("--interval 1")
+
+ reaper_command = (REAPER_COMMAND +
+ ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'),
+
+ genofile_name,
+ TEMPDIR,
+ trait_filename,
+ " ".join(
+ opt_list),
+ webqtlConfig.GENERATED_IMAGE_DIR,
+ output_filename))
+ os.system(reaper_command)
+ else:
+ output_filename, permu_filename, bootstrap_filename = output_files
+
+ marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(
+ output_filename, permu_filename, bootstrap_filename)
+
+ suggestive = 0
+ significant = 0
+ if len(permu_vals) > 0:
+ suggestive = permu_vals[int(num_perm * 0.37 - 1)]
+ significant = permu_vals[int(num_perm * 0.95 - 1)]
+
+ return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals,
+ [output_filename, permu_filename, bootstrap_filename])
+
+
+def gen_pheno_txt_file(samples, vals, trait_filename):
+ """Generates phenotype file for GEMMA"""
+
+ with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile:
+ outfile.write("Trait\t")
+
+ filtered_sample_list = []
+ filtered_vals_list = []
+ for i, sample in enumerate(samples):
+ if vals[i] != "x":
+ filtered_sample_list.append(sample)
+ filtered_vals_list.append(vals[i])
+
+ samples_string = "\t".join(filtered_sample_list)
+ outfile.write(samples_string + "\n")
+ outfile.write("T1\t")
+ values_string = "\t".join(filtered_vals_list)
+ outfile.write(values_string)
+
+
+def parse_reaper_output(gwa_filename, permu_filename, bootstrap_filename):
+ included_markers = []
+ p_values = []
+ marker_obs = []
+
+ only_cm = False
+ only_mb = False
+
+ with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{gwa_filename}.txt") as output_file:
+ for line in output_file:
+ if line.startswith("ID\t"):
+ if len(line.split("\t")) < 8:
+ if 'cM' in line.split("\t"):
+ only_cm = True
+ else:
+ only_mb = True
+ continue
+ else:
+ marker = {}
+ marker['name'] = line.split("\t")[1]
+ try:
+ marker['chr'] = int(line.split("\t")[2])
+ except:
+ marker['chr'] = line.split("\t")[2]
+ if only_cm or only_mb:
+ if only_cm:
+ marker['cM'] = float(line.split("\t")[3])
+ else:
+ if float(line.split("\t")[3]) > 1000:
+ marker['Mb'] = float(line.split("\t")[3]) / 1000000
+ else:
+ marker['Mb'] = float(line.split("\t")[3])
+ if float(line.split("\t")[6]) != 1:
+ marker['p_value'] = float(line.split("\t")[6])
+ marker['lrs_value'] = float(line.split("\t")[4])
+ marker['lod_score'] = marker['lrs_value'] / 4.61
+ marker['additive'] = float(line.split("\t")[5])
+ else:
+ marker['cM'] = float(line.split("\t")[3])
+ if float(line.split("\t")[4]) > 1000:
+ marker['Mb'] = float(line.split("\t")[4]) / 1000000
+ else:
+ marker['Mb'] = float(line.split("\t")[4])
+ if float(line.split("\t")[7]) != 1:
+ marker['p_value'] = float(line.split("\t")[7])
+ marker['lrs_value'] = float(line.split("\t")[5])
+ marker['lod_score'] = marker['lrs_value'] / 4.61
+ marker['additive'] = float(line.split("\t")[6])
+ marker_obs.append(marker)
+
+ # ZS: Results have to be reordered because the new reaper returns results sorted alphabetically by chr for some reason, resulting in chr 1 being followed by 10, etc
+ sorted_indices = natural_sort(marker_obs)
+
+ permu_vals = []
+ if permu_filename:
+ with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{permu_filename}.txt") as permu_file:
+ for line in permu_file:
+ permu_vals.append(float(line))
+
+ bootstrap_vals = []
+ if bootstrap_filename:
+ with open(f"{webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt") as bootstrap_file:
+ for line in bootstrap_file:
+ bootstrap_vals.append(int(line))
+
+ marker_obs = [marker_obs[i] for i in sorted_indices]
+ if len(bootstrap_vals) > 0:
+ bootstrap_vals = [bootstrap_vals[i] for i in sorted_indices]
+
+ return marker_obs, permu_vals, bootstrap_vals
+
+
+def natural_sort(marker_list):
+ """
+ Function to naturally sort numbers + strings, adopted from user Mark Byers here: https://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort
+ Changed to return indices instead of values, though, since the same reordering needs to be applied to bootstrap results
+ """
+
+ def convert(text):
+ if text.isdigit():
+ return int(text)
+ else:
+ if text != "M":
+ return text.lower()
+ else:
+ return "z"
+
+ alphanum_key = lambda key: [convert(c) for c in re.split(
+ '([0-9]+)', str(marker_list[key]['chr']))]
+ return sorted(list(range(len(marker_list))), key=alphanum_key)