aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py184
1 files changed, 104 insertions, 80 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 7777735d..c4bcef8b 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -1,109 +1,131 @@
-import os, math, string, random, json
+import os
+import math
+import string
+import random
+import json
from base import webqtlConfig
from base.trait import create_trait
from base.data_set import create_dataset
-from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE
+from utility.tools import flat_files
+from utility.tools import GEMMA_WRAPPER_COMMAND
+from utility.tools import TEMPDIR
+from utility.tools import WEBSERVER_MODE
import utility.logger
-logger = utility.logger.getLogger(__name__ )
+logger = utility.logger.getLogger(__name__)
GEMMAOPTS = "-debug"
if WEBSERVER_MODE == 'PROD':
- GEMMAOPTS = "-no-check"
+ GEMMAOPTS = "-no-check"
-def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01, first_run=True, output_files=None):
- """Generates p-values for each marker using GEMMA"""
- if this_dataset.group.genofile != None:
+def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
+ maf=0.01, first_run=True, output_files=None):
+ """Generates p-values for each marker using GEMMA"""
+ if this_dataset.group.genofile is not None:
genofile_name = this_dataset.group.genofile[:-5]
else:
genofile_name = this_dataset.group.name
if first_run:
- trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
- gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
-
- if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
- open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
-
- k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
- gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
-
- this_chromosomes = this_dataset.species.chromosomes.chromosomes
- this_chromosomes_name=[this_chromosomes[chromosome].name for chromosome in this_chromosomes]
-
-
- chr_list_string=",".join(this_chromosomes_name)
- if covariates != "":
- gen_covariates_file(this_dataset, covariates, samples)
- if use_loco == "True":
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
-
- os.system(generate_k_command)
-
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename)
- if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
- this_dataset.group.name,
- flat_files('genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
- else:
- gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- maf,
- TEMPDIR,
- gwa_output_filename)
-
- else:
- generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- trait_filename,
- flat_files('genotype/bimbam'),
- genofile_name,
- TEMPDIR,
- k_output_filename)
-
- os.system(generate_k_command)
-
- gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
- genofile_name,
- flat_files('genotype/bimbam'),
+ trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + \
+ ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(6))
+ gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
+
+ if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
+ open("{}{}_output.assoc.txt".format(
+ webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
+
+ k_output_filename = this_dataset.group.name + "_K_" + \
+ ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(6))
+ gwa_output_filename = this_dataset.group.name + "_GWA_" + \
+ ''.join(random.choice(string.ascii_uppercase + string.digits)
+ for _ in range(6))
+
+ this_chromosomes = this_dataset.species.chromosomes.chromosomes
+ this_chromosomes_name = [
+ this_chromosomes[chromosome].name for chromosome in this_chromosomes]
+
+ chr_list_string = ",".join(this_chromosomes_name)
+ if covariates != "":
+ gen_covariates_file(this_dataset, covariates, samples)
+ if use_loco == "True":
+ generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ trait_filename,
+ flat_files(
+ 'genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ k_output_filename)
+
+ os.system(generate_k_command)
+
+ gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ trait_filename)
+ if covariates != "":
+ gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
+ this_dataset.group.name,
+ flat_files(
+ 'genotype/bimbam'),
+ genofile_name,
+ maf,
+ TEMPDIR,
+ gwa_output_filename)
+ else:
+ gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
genofile_name,
+ maf,
TEMPDIR,
- trait_filename)
-
-
- if covariates != "":
- gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename)
- else:
- gemma_command += ' > %s/gn2/%s.json' % (TEMPDIR, gwa_output_filename)
+ gwa_output_filename)
+
+ else:
+ generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ trait_filename,
+ flat_files(
+ 'genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ k_output_filename)
+
+ os.system(generate_k_command)
+
+ gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
+ genofile_name,
+ flat_files(
+ 'genotype/bimbam'),
+ genofile_name,
+ TEMPDIR,
+ trait_filename)
+
+ if covariates != "":
+ gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (
+ flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename)
+ else:
+ gemma_command += ' > %s/gn2/%s.json' % (
+ TEMPDIR, gwa_output_filename)
- os.system(gemma_command)
+ os.system(gemma_command)
else:
- gwa_output_filename = output_files
+ gwa_output_filename = output_files
if use_loco == "True":
marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
return marker_obs, gwa_output_filename
else:
- marker_obs = parse_loco_output(this_dataset, gwa_output_filename, use_loco)
+ marker_obs = parse_loco_output(
+ this_dataset, gwa_output_filename, use_loco)
return marker_obs, gwa_output_filename
+
def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
"""Generates phenotype file for GEMMA"""
@@ -115,6 +137,7 @@ def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
else:
outfile.write(value + "\n")
+
def gen_covariates_file(this_dataset, covariates, samples):
covariate_list = covariates.split(",")
covariate_data_object = []
@@ -145,11 +168,12 @@ def gen_covariates_file(this_dataset, covariates, samples):
outfile.write(str(this_covariate[i]) + "\t")
outfile.write("\n")
+
def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
output_filelist = []
with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file:
- data = json.load(data_file)
+ data = json.load(data_file)
files = data['files']
for file in files: