aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2021-02-08 18:22:41 +0000
committerzsloan2021-02-08 18:22:41 +0000
commit971bad0eb26cdc27cc374290e9c5d5a921e44af4 (patch)
tree8d4168a3fac6210e1e41f0518ec54dc6c23d61fd
parent8c41a7404477ab7da86924e6d05089afc24c16a4 (diff)
parent8d7bc9bde9653d16daa67e230cf8f83fd9bf8c08 (diff)
downloadgenenetwork2-971bad0eb26cdc27cc374290e9c5d5a921e44af4.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py208
1 files changed, 115 insertions, 93 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 7777735d..83ebcdf9 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -1,120 +1,145 @@
-import os, math, string, random, json
+import os
+import math
+import string
+import random
+import json
from base import webqtlConfig
from base.trait import create_trait
from base.data_set import create_dataset
-from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE
+from utility.tools import flat_files
+from utility.tools import GEMMA_WRAPPER_COMMAND
+from utility.tools import TEMPDIR
+from utility.tools import WEBSERVER_MODE
import utility.logger
-logger = utility.logger.getLogger(__name__ )
+logger = utility.logger.getLogger(__name__)
GEMMAOPTS = "-debug"
if WEBSERVER_MODE == 'PROD':
- GEMMAOPTS = "-no-check"
+ GEMMAOPTS = "-no-check"
+
+
+def generate_random_n_string(n):
+ return ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(n))
-def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01, first_run=True, output_files=None):
- """Generates p-values for each marker using GEMMA"""
- if this_dataset.group.genofile != None:
+def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
+ maf=0.01, first_run=True, output_files=None):
+ """Generates p-values for each marker using GEMMA"""
+ if this_dataset.group.genofile is not None:
genofile_name = this_dataset.group.genofile[:-5]
else:
genofile_name = this_dataset.group.name
if first_run:
- trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
- gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
-
- if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
- open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
-
- k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
- gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
-
- this_chromosomes = this_dataset.species.chromosomes.chromosomes
- this_chromosomes_name=[this_chromosomes[chromosome].name for chromosome in this_chromosomes]
-
-
- chr_list_string=",".join(this_chromosomes_name)
- if covariates != "":
- gen_covariates_file(this_dataset, covariates, samples)
- if use_loco == "True":
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
-
- os.system(generate_k_command)
-
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename)
- if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
- this_dataset.group.name,
- flat_files('genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
- else:
- gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
-
- else:
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
-
- os.system(generate_k_command)
-
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename)
-
-
- if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename)
- else:
- gemma_command += ' > %s/gn2/%s.json' % (TEMPDIR, gwa_output_filename)
-
- os.system(gemma_command)
+ trait_filename = (f"{str(this_dataset.group.name)}_"
+ f"{str(this_trait.name)}_"
+ f"{generate_random_n_string(6)}")
+ gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
+
+ if not os.path.isfile(f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+ f"{genofile_name}_output.assoc.txt"):
+ open((f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+ f"{genofile_name}_output.assoc.txt"),
+ "w+")
+
+ k_output_filename = (f"{this_dataset.group.name}_K_"
+ f"{generate_random_n_string(6)}")
+ gwa_output_filename = (f"{this_dataset.group.name}_GWA_"
+ f"{generate_random_n_string(6)}")
+
+ this_chromosomes = this_dataset.species.chromosomes.chromosomes
+ this_chromosomes_name = [this_chromosomes[chromosome].name
+ for chromosome in this_chromosomes]
+
+ chr_list_string = ",".join(this_chromosomes_name)
+ if covariates != "":
+ gen_covariates_file(this_dataset, covariates, samples)
+ if use_loco == "True":
+ generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+ f"{chr_list_string} -- {GEMMAOPTS} "
+ f"-g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+ f"{flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -gk > "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json")
+ os.system(generate_k_command)
+
+ gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+ f"--input {TEMPDIR}/gn2/{k_output_filename}.json "
+ f"-- {GEMMAOPTS} "
+ f"-g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt "
+ f"-p {TEMPDIR}/gn2/{trait_filename}.txt ")
+ if covariates != "":
+ gemma_command += (f"-c {flat_files('mapping')}/"
+ f"{this_dataset.group.name}_covariates.txt "
+ f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt "
+ f"-lmm 9 -maf {maf} > {TEMPDIR}/gn2/"
+ f"{gwa_output_filename}.json")
+ else:
+ gemma_command += (f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -lmm 9 -maf "
+ f"{maf} > "
+ f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
+
+ else:
+ generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json -- "
+ f"{GEMMAOPTS} "
+ f" -g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+ f"{flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt -gk > "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json")
+
+ os.system(generate_k_command)
+
+ gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --input "
+ f"{TEMPDIR}/gn2/{k_output_filename}.json -- "
+ f"{GEMMAOPTS} "
+ f"-a {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_snps.txt "
+ f"-lmm 9 -g {flat_files('genotype/bimbam')}/"
+ f"{genofile_name}_geno.txt -p "
+ f"{TEMPDIR}/gn2/{trait_filename}.txt ")
+
+ if covariates != "":
+ gemma_command += (f" -c {flat_files('mapping')}/"
+ f"{this_dataset.group.name}"
+ f"_covariates.txt > "
+ f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
+ else:
+ gemma_command += f" > {TEMPDIR}/gn2/{gwa_output_filename}.json"
+
+ os.system(gemma_command)
else:
- gwa_output_filename = output_files
+ gwa_output_filename = output_files
if use_loco == "True":
marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
return marker_obs, gwa_output_filename
else:
- marker_obs = parse_loco_output(this_dataset, gwa_output_filename, use_loco)
+ marker_obs = parse_loco_output(
+ this_dataset, gwa_output_filename, use_loco)
return marker_obs, gwa_output_filename
+
def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
"""Generates phenotype file for GEMMA"""
- current_file_data = []
- with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile:
+ with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile:
for value in vals:
if value == "x":
outfile.write("NA\n")
else:
outfile.write(value + "\n")
+
def gen_covariates_file(this_dataset, covariates, samples):
covariate_list = covariates.split(",")
covariate_data_object = []
@@ -125,8 +150,6 @@ def gen_covariates_file(this_dataset, covariates, samples):
trait_ob = create_trait(dataset=dataset_ob,
name=trait_name,
cellid=None)
-
- #trait_samples = this_dataset.group.all_samples_ordered()
this_dataset.group.get_samplelist()
trait_samples = this_dataset.group.samplelist
trait_sample_data = trait_ob.data
@@ -139,17 +162,20 @@ def gen_covariates_file(this_dataset, covariates, samples):
this_covariate_data.append("-9")
covariate_data_object.append(this_covariate_data)
- with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
+ with open((f"{flat_files('mapping')}/"
+ f"{this_dataset.group.name}_covariates.txt"),
+ "w") as outfile:
for i in range(len(covariate_data_object[0])):
for this_covariate in covariate_data_object:
outfile.write(str(this_covariate[i]) + "\t")
outfile.write("\n")
+
def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
output_filelist = []
- with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file:
- data = json.load(data_file)
+ with open(f"{TEMPDIR}/gn2/{gwa_output_filename}.json") as data_file:
+ data = json.load(data_file)
files = data['files']
for file in files:
@@ -160,10 +186,8 @@ def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
marker_obs = []
previous_chr = 0
- no_results = False
for this_file in output_filelist:
if not os.path.isfile(this_file):
- no_results = True
break
with open(this_file) as output_file:
for line in output_file:
@@ -188,10 +212,8 @@ def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
marker['additive'] = float(line.split("\t")[7])
if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
marker['lod_score'] = 0
- #marker['lrs_value'] = 0
else:
marker['lod_score'] = -math.log10(marker['p_value'])
- #marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
marker_obs.append(marker)
included_markers.append(line.split("\t")[1])