about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py208
1 files changed, 115 insertions, 93 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 7777735d..83ebcdf9 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -1,120 +1,145 @@
-import os, math, string, random, json
+import os
+import math
+import string
+import random
+import json
 
 from base import webqtlConfig
 from base.trait import create_trait
 from base.data_set import create_dataset
-from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE
+from utility.tools import flat_files
+from utility.tools import GEMMA_WRAPPER_COMMAND
+from utility.tools import TEMPDIR
+from utility.tools import WEBSERVER_MODE
 
 import utility.logger
-logger = utility.logger.getLogger(__name__ )
+logger = utility.logger.getLogger(__name__)
 
 GEMMAOPTS = "-debug"
 if WEBSERVER_MODE == 'PROD':
-  GEMMAOPTS = "-no-check"
+    GEMMAOPTS = "-no-check"
+
+
+def generate_random_n_string(n):
+    return ''.join(random.choice(string.ascii_uppercase + string.digits)
+                   for _ in range(n))
 
-def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01, first_run=True, output_files=None):
-    """Generates p-values for each marker using GEMMA"""
 
-    if this_dataset.group.genofile != None:
+def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco,
+              maf=0.01, first_run=True, output_files=None):
+    """Generates p-values for each marker using GEMMA"""
+    if this_dataset.group.genofile is not None:
         genofile_name = this_dataset.group.genofile[:-5]
     else:
         genofile_name = this_dataset.group.name
 
     if first_run:
-      trait_filename = str(this_dataset.group.name) + "_" + str(this_trait.name) + "_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
-      gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
-
-      if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
-          open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
-
-      k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
-      gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
-
-      this_chromosomes = this_dataset.species.chromosomes.chromosomes
-      this_chromosomes_name=[this_chromosomes[chromosome].name for chromosome in this_chromosomes]
-
-
-      chr_list_string=",".join(this_chromosomes_name)
-      if covariates != "":
-          gen_covariates_file(this_dataset, covariates, samples)
-      if use_loco == "True":
-          generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
-                                                                                          genofile_name,
-                                                                                          TEMPDIR,
-                                                                                          trait_filename,
-                                                                                          flat_files('genotype/bimbam'),
-                                                                                          genofile_name,
-                                                                                          TEMPDIR,
-                                                                                          k_output_filename)
-
-          os.system(generate_k_command)
-
-          gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
-                                                                                          genofile_name,
-                                                                                          TEMPDIR,
-                                                                                          trait_filename)
-          if covariates != "":
-              gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
-                                                                                                                this_dataset.group.name,
-                                                                                                                flat_files('genotype/bimbam'),
-                                                                                                                genofile_name,
-                                                                                                                maf,
-                                                                                                                TEMPDIR,
-                                                                                                                gwa_output_filename)
-          else:
-              gemma_command += ' -a %s/%s_snps.txt -lmm 9 -maf %s > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
-                                                                                                               genofile_name,
-                                                                                                               maf,
-                                                                                                               TEMPDIR,
-                                                                                                               gwa_output_filename)
-
-      else:
-          generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
-                                                                                         genofile_name,
-                                                                                         TEMPDIR,
-                                                                                         trait_filename,
-                                                                                         flat_files('genotype/bimbam'),
-                                                                                         genofile_name,
-                                                                                         TEMPDIR,
-                                                                                         k_output_filename)
-
-          os.system(generate_k_command)
-
-          gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -a %s/%s_snps.txt -lmm 9 -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
-                                                                                         genofile_name,
-                                                                                         flat_files('genotype/bimbam'),
-                                                                                         genofile_name,
-                                                                                         TEMPDIR,
-                                                                                         trait_filename)
-
-
-          if covariates != "":
-              gemma_command += ' -c %s/%s_covariates.txt > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, TEMPDIR, gwa_output_filename)
-          else:
-              gemma_command += ' > %s/gn2/%s.json' % (TEMPDIR, gwa_output_filename)
-
-      os.system(gemma_command)
+        trait_filename = (f"{str(this_dataset.group.name)}_"
+                          f"{str(this_trait.name)}_"
+                          f"{generate_random_n_string(6)}")
+        gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
+
+        if not os.path.isfile(f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+                              f"{genofile_name}_output.assoc.txt"):
+            open((f"{webqtlConfig.GENERATED_IMAGE_DIR}"
+                  f"{genofile_name}_output.assoc.txt"),
+                 "w+")
+
+        k_output_filename = (f"{this_dataset.group.name}_K_"
+                             f"{generate_random_n_string(6)}")
+        gwa_output_filename = (f"{this_dataset.group.name}_GWA_"
+                               f"{generate_random_n_string(6)}")
+
+        this_chromosomes = this_dataset.species.chromosomes.chromosomes
+        this_chromosomes_name = [this_chromosomes[chromosome].name
+                                 for chromosome in this_chromosomes]
+
+        chr_list_string = ",".join(this_chromosomes_name)
+        if covariates != "":
+            gen_covariates_file(this_dataset, covariates, samples)
+        if use_loco == "True":
+            generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+                                  f"{chr_list_string} -- {GEMMAOPTS} "
+                                  f"-g {flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_geno.txt -p "
+                                  f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+                                  f"{flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_snps.txt -gk > "
+                                  f"{TEMPDIR}/gn2/{k_output_filename}.json")
+            os.system(generate_k_command)
+
+            gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --loco "
+                             f"--input {TEMPDIR}/gn2/{k_output_filename}.json "
+                             f"-- {GEMMAOPTS} "
+                             f"-g {flat_files('genotype/bimbam')}/"
+                             f"{genofile_name}_geno.txt "
+                             f"-p {TEMPDIR}/gn2/{trait_filename}.txt ")
+            if covariates != "":
+                gemma_command += (f"-c {flat_files('mapping')}/"
+                                  f"{this_dataset.group.name}_covariates.txt "
+                                  f"-a {flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_snps.txt "
+                                  f"-lmm 9 -maf {maf} > {TEMPDIR}/gn2/"
+                                  f"{gwa_output_filename}.json")
+            else:
+                gemma_command += (f"-a {flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_snps.txt -lmm 9 -maf "
+                                  f"{maf} > "
+                                  f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
+
+        else:
+            generate_k_command = (f"{GEMMA_WRAPPER_COMMAND} --json -- "
+                                  f"{GEMMAOPTS} "
+                                  f" -g {flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_geno.txt -p "
+                                  f"{TEMPDIR}/gn2/{trait_filename}.txt -a "
+                                  f"{flat_files('genotype/bimbam')}/"
+                                  f"{genofile_name}_snps.txt -gk > "
+                                  f"{TEMPDIR}/gn2/{k_output_filename}.json")
+
+            os.system(generate_k_command)
+
+            gemma_command = (f"{GEMMA_WRAPPER_COMMAND} --json --input "
+                             f"{TEMPDIR}/gn2/{k_output_filename}.json -- "
+                             f"{GEMMAOPTS} "
+                             f"-a {flat_files('genotype/bimbam')}/"
+                             f"{genofile_name}_snps.txt "
+                             f"-lmm 9 -g {flat_files('genotype/bimbam')}/"
+                             f"{genofile_name}_geno.txt -p "
+                             f"{TEMPDIR}/gn2/{trait_filename}.txt ")
+
+            if covariates != "":
+                gemma_command += (f" -c {flat_files('mapping')}/"
+                                  f"{this_dataset.group.name}"
+                                  f"_covariates.txt > "
+                                  f"{TEMPDIR}/gn2/{gwa_output_filename}.json")
+            else:
+                gemma_command += f" > {TEMPDIR}/gn2/{gwa_output_filename}.json"
+
+        os.system(gemma_command)
     else:
-      gwa_output_filename = output_files
+        gwa_output_filename = output_files
 
     if use_loco == "True":
         marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
         return marker_obs, gwa_output_filename
     else:
-        marker_obs = parse_loco_output(this_dataset, gwa_output_filename, use_loco)
+        marker_obs = parse_loco_output(
+            this_dataset, gwa_output_filename, use_loco)
         return marker_obs, gwa_output_filename
 
+
 def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
     """Generates phenotype file for GEMMA"""
 
-    current_file_data = []
-    with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile:
+    with open(f"{TEMPDIR}/gn2/{trait_filename}.txt", "w") as outfile:
         for value in vals:
             if value == "x":
                 outfile.write("NA\n")
             else:
                 outfile.write(value + "\n")
 
+
 def gen_covariates_file(this_dataset, covariates, samples):
     covariate_list = covariates.split(",")
     covariate_data_object = []
@@ -125,8 +150,6 @@ def gen_covariates_file(this_dataset, covariates, samples):
         trait_ob = create_trait(dataset=dataset_ob,
                                 name=trait_name,
                                 cellid=None)
-
-        #trait_samples = this_dataset.group.all_samples_ordered()
         this_dataset.group.get_samplelist()
         trait_samples = this_dataset.group.samplelist
         trait_sample_data = trait_ob.data
@@ -139,17 +162,20 @@ def gen_covariates_file(this_dataset, covariates, samples):
                     this_covariate_data.append("-9")
         covariate_data_object.append(this_covariate_data)
 
-    with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
+    with open((f"{flat_files('mapping')}/"
+               f"{this_dataset.group.name}_covariates.txt"),
+              "w") as outfile:
         for i in range(len(covariate_data_object[0])):
             for this_covariate in covariate_data_object:
                 outfile.write(str(this_covariate[i]) + "\t")
             outfile.write("\n")
 
+
 def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
 
     output_filelist = []
-    with open("{}/gn2/".format(TEMPDIR) + gwa_output_filename + ".json") as data_file:
-       data = json.load(data_file)
+    with open(f"{TEMPDIR}/gn2/{gwa_output_filename}.json") as data_file:
+        data = json.load(data_file)
 
     files = data['files']
     for file in files:
@@ -160,10 +186,8 @@ def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
     marker_obs = []
     previous_chr = 0
 
-    no_results = False
     for this_file in output_filelist:
         if not os.path.isfile(this_file):
-            no_results = True
             break
         with open(this_file) as output_file:
             for line in output_file:
@@ -188,10 +212,8 @@ def parse_loco_output(this_dataset, gwa_output_filename, loco="True"):
                     marker['additive'] = float(line.split("\t")[7])
                     if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
                         marker['lod_score'] = 0
-                        #marker['lrs_value'] = 0
                     else:
                         marker['lod_score'] = -math.log10(marker['p_value'])
-                        #marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
                     marker_obs.append(marker)
 
                     included_markers.append(line.split("\t")[1])