diff options
-rw-r--r-- | wqflask/wqflask/marker_regression/gemma_mapping.py | 184 |
1 files changed, 104 insertions, 80 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 7777735d..c4bcef8b 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,109 +1,131 @@ -import os, math, string, random, json +import os +import math +import string +import random +import json from base import webqtlConfig from base.trait import create_trait from base.data_set import create_dataset -from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE +from utility.tools import flat_files +from utility.tools import GEMMA_WRAPPER_COMMAND +from utility.tools import TEMPDIR +from utility.tools import WEBSERVER_MODE import utility.logger -logger = utility.logger.getLogger(__name__ ) +logger = utility.logger.getLogger(__name__) GEMMAOPTS = "-debug" if WEBSERVER_MODE == 'PROD': - GEMMAOPTS = "-no-check" + GEMMAOPTS = "-no-check" -def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01, first_run=True, output_files=None): - """Generates p-values for each marker using GEMMA""" - if this_dataset.group.genofile != None: +def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, + maf=0.01, first_run=True, output_files=None): + """Generates p-values for each marker using GEMMA""" + if this_dataset.group.genofile is not None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name if first_run: - trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename) - - if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): - open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") - - k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - - this_chromosomes = this_dataset.species.chromosomes.chromosomes - this_chromosomes_name=[this_chromosomes[chromosome].name for chromosome in this_chromosomes] - - - chr_list_string=",".join(this_chromosomes_name) - if covariates != "": - gen_covariates_file(this_dataset, covariates, samples) - if use_loco == "True": - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) - - os.system(generate_k_command) - - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename) - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - genofile_name, - maf, - TEMPDIR, - gwa_output_filename) - else: - gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - maf, - TEMPDIR, - gwa_output_filename) - - else: - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - trait_filename, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) - - os.system(generate_k_command) - - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), + trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename) + + if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): + open("{}{}_output.assoc.txt".format( + webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") + + k_output_filename = this_dataset.group.name + "_K_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + gwa_output_filename = this_dataset.group.name + "_GWA_" + \ + ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(6)) + + this_chromosomes = this_dataset.species.chromosomes.chromosomes + this_chromosomes_name = [ + this_chromosomes[chromosome].name for chromosome in this_chromosomes] + + chr_list_string = ",".join(this_chromosomes_name) + if covariates != "": + gen_covariates_file(this_dataset, covariates, samples) + if use_loco == "True": + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + trait_filename, + flat_files( + 'genotype/bimbam'), + genofile_name, + TEMPDIR, + k_output_filename) + + os.system(generate_k_command) + + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + trait_filename) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'), + this_dataset.group.name, + flat_files( + 'genotype/bimbam'), + genofile_name, + maf, + TEMPDIR, + gwa_output_filename) + else: + gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, + maf, TEMPDIR, - trait_filename) - - - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename) - else: - gemma_command += ' > %s/gn2/%s.json' % (TEMPDIR, gwa_output_filename) + gwa_output_filename) + + else: + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + trait_filename, + flat_files( + 'genotype/bimbam'), + genofile_name, + TEMPDIR, + k_output_filename) + + os.system(generate_k_command) + + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'), + genofile_name, + flat_files( + 'genotype/bimbam'), + genofile_name, + TEMPDIR, + trait_filename) + + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % ( + flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename) + else: + gemma_command += ' > %s/gn2/%s.json' % ( + TEMPDIR, gwa_output_filename) - os.system(gemma_command) + os.system(gemma_command) else: - gwa_output_filename = output_files + gwa_output_filename = output_files if use_loco == "True": marker_obs = parse_loco_output(this_dataset, gwa_output_filename) return marker_obs, gwa_output_filename else: - marker_obs = parse_loco_output(this_dataset, gwa_output_filename, use_loco) + marker_obs = parse_loco_output( + this_dataset, gwa_output_filename, use_loco) return marker_obs, gwa_output_filename + def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename): """Generates phenotype file for GEMMA""" @@ -115,6 +137,7 @@ def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename): else: outfile.write(value + "\n") + def gen_covariates_file(this_dataset, covariates, samples): covariate_list = covariates.split(",") covariate_data_object = [] @@ -145,11 +168,12 @@ def gen_covariates_file(this_dataset, covariates, samples): outfile.write(str(this_covariate[i]) + "\t") outfile.write("\n") + def parse_loco_output(this_dataset, gwa_output_filename, loco="True"): output_filelist = [] with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file: - data = json.load(data_file) + data = json.load(data_file) files = data['files'] for file in files: |