Quick fix to issue where GEMMA wouldn't be run simultaneously on traits from the same group

author: zsloan 2018-11-04 16:11:34 +0000
committer: zsloan 2018-11-04 16:11:34 +0000
commit: 319879a9daf2c12d3919497d7e752600701e92d3 (patch)
tree: c58148cf13e054470fb9f2b2d235c161eaacc5e6 /wqflask
parent: 1007c54ea32a2208cc1755a9a788a1f84342fd66 (diff)
download: genenetwork2-319879a9daf2c12d3919497d7e752600701e92d3.tar.gz
2 files changed, 20 insertions, 19 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index ffe26190..1644bbf7 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -12,7 +12,7 @@ GEMMAOPTS = "-debug"
 if WEBSERVER_MODE == 'PROD':
   GEMMAOPTS = "-no-check"
 
-def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
+def run_gemma(this_trait, this_dataset, samples, vals, covariates, use_loco, maf=0.01):
     """Generates p-values for each marker using GEMMA"""
 
     if this_dataset.group.genofile != None:
@@ -20,7 +20,8 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
     else:
         genofile_name = this_dataset.group.name
 
-    gen_pheno_txt_file(this_dataset, genofile_name, vals)
+    trait_filename = this_trait.name + "_" + this_dataset.name + "_pheno"
+    gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename)
 
     if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
         open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")
@@ -39,10 +40,10 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
     k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
     gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
     if use_loco == "True":
-        generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
-                                                                                        genofile_name,
-                                                                                        flat_files('genotype/bimbam'),
+        generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
                                                                                         genofile_name,
+                                                                                        TEMPDIR,
+                                                                                        trait_filename,
                                                                                         flat_files('genotype/bimbam'),
                                                                                         genofile_name,
                                                                                         TEMPDIR,
@@ -50,10 +51,10 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
         logger.debug("k_command:" + generate_k_command)
         os.system(generate_k_command)
 
-        gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (flat_files('genotype/bimbam'),
+        gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- ' % (TEMPDIR, k_output_filename) + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt' % (flat_files('genotype/bimbam'),
                                                                                         genofile_name,
-                                                                                        flat_files('genotype/bimbam'),
-                                                                                        genofile_name)
+                                                                                        TEMPDIR,
+                                                                                        trait_filename)
         if covariates != "":
             gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf %s > %s/gn2/%s.json' % (flat_files('mapping'),
                                                                                                               this_dataset.group.name,
@@ -70,18 +71,18 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
                                                                                                              gwa_output_filename)
 
     else:
-        generate_k_command = GEMMA_COMMAND + ' ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -outdir %s/gn2/ -o %s' % (flat_files('genotype/bimbam'),
-                                                                                        genofile_name,
-                                                                                        flat_files('genotype/bimbam'),
+        generate_k_command = GEMMA_COMMAND + ' ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -gk -outdir %s/gn2/ -o %s' % (flat_files('genotype/bimbam'),
                                                                                         genofile_name,
+                                                                                        TEMPDIR,
+                                                                                        trait_filename,
                                                                                         flat_files('genotype/bimbam'),
                                                                                         genofile_name,
                                                                                         TEMPDIR,
                                                                                         k_output_filename)
-        #generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
+        #generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
         #                                                                                genofile_name,
         #                                                                                flat_files('genotype/bimbam'),
-        #                                                                                genofile_name,
+        #                                                                                trait_filename,
         #                                                                                flat_files('genotype/bimbam'),
         #                                                                                genofile_name,
         #                                                                                TEMPDIR,
@@ -90,10 +91,10 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
         logger.debug("k_command:" + generate_k_command)
         os.system(generate_k_command)
 
-        gemma_command = GEMMA_COMMAND + ' ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/gn2/%s.cXX.txt -lmm 2 -maf %s' % (flat_files('genotype/bimbam'),
-                                                                                        genofile_name,
-                                                                                        flat_files('genotype/bimbam'),
+        gemma_command = GEMMA_COMMAND + ' ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/gn2/%s.txt -a %s/%s_snps.txt -k %s/gn2/%s.cXX.txt -lmm 2 -maf %s' % (flat_files('genotype/bimbam'),
                                                                                         genofile_name,
+                                                                                        TEMPDIR,
+                                                                                        trait_filename,
                                                                                         flat_files('genotype/bimbam'),
                                                                                         genofile_name,
                                                                                         TEMPDIR,
@@ -141,11 +142,11 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco, maf=0.01):
 
     return marker_obs
 
-def gen_pheno_txt_file(this_dataset, genofile_name, vals):
+def gen_pheno_txt_file(this_dataset, genofile_name, vals, trait_filename):
     """Generates phenotype file for GEMMA"""
 
     current_file_data = []
-    with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile:
+    with open("{}/gn2/{}.txt".format(TEMPDIR, trait_filename), "w") as outfile:
         for value in vals:
             if value == "x":
                 outfile.write("NA\n")
diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index b604b467..aee3cc2a 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -178,7 +178,7 @@ class RunMapping(object):
             self.score_type = "-log(p)"
             self.manhattan_plot = True
             with Bench("Running GEMMA"):
-                marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf)
+                marker_obs = gemma_mapping.run_gemma(self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf)
             results = marker_obs
         elif self.mapping_method == "rqtl_plink":
             results = self.run_rqtl_plink()
author	zsloan	2018-11-04 16:11:34 +0000
committer	zsloan	2018-11-04 16:11:34 +0000
commit	319879a9daf2c12d3919497d7e752600701e92d3 (patch)
tree	c58148cf13e054470fb9f2b2d235c161eaacc5e6 /wqflask
parent	1007c54ea32a2208cc1755a9a788a1f84342fd66 (diff)
download	genenetwork2-319879a9daf2c12d3919497d7e752600701e92d3.tar.gz