Merge pull request #565 from zsloan/bug/fix_rqtl_covariates

Stopped using the scanone function pointer when doing R/qtl mapping, …
author: zsloan 2021-05-06 18:14:49 -0500
committer: GitHub 2021-05-06 18:14:49 -0500
commit: 4956330a2a67a4b12c91ace7ef5e05662034cf89 (patch)
tree: bfd200e408f4e41da437e6a2c99edc004f37cbfc /wqflask
parent: 3c430082b767a29c3e35cb03e68c1b22373ad353 (diff)
parent: cec8c8870f12a52ac20ffcc2f4a8a05003a13a9f (diff)
download: genenetwork2-4956330a2a67a4b12c91ace7ef5e05662034cf89.tar.gz
1 files changed, 27 insertions, 26 deletions
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 1fa3dffe..06eceada 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -10,6 +10,7 @@ from base.trait import create_trait
 from base.data_set import create_dataset
 from utility import webqtlUtil
 from utility.tools import locate, TEMPDIR
+from wqflask.marker_regression.gemma_mapping import generate_random_n_string
 from flask import g
 
 import utility.logger
@@ -84,6 +85,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
     else:
         cross_object = calc_genoprob(cross_object, step=5, stepwidth="max")
     logger.info("after calc_genoprob")
+
     pheno_string = sanitize_rqtl_phenotype(vals)
     logger.info("phenostring done")
     names_string = sanitize_rqtl_names(samples)
@@ -125,13 +127,13 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
     else:
         if do_control == "true" or cofactors != "":
             logger.info("Using covariate")
-            result_data_frame = scanone(
-                cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method)
-            ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")')
+            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', addcovar=all_covars, model='{model}', method='{method}')")
+            result_data_frame = ro.r("qtl_results")
         else:
-            logger.info("No covariates")
-            result_data_frame = scanone(
-                cross_object, pheno="the_pheno", model=model, method=method)
+            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', model='{model}', method='{method}')")
+            result_data_frame = np.asarray(ro.r("qtl_results")).T
+
+        marker_names = np.asarray(ro.r("row.names(qtl_results)"))
 
         # Do permutation (if requested by user)
         if num_perm > 0 and permCheck == "ON":
@@ -139,6 +141,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
             if len(perm_strata_list) > 0:
                 cross_object, strata_ob = add_perm_strata(
                     cross_object, perm_strata_list)
+
                 if do_control == "true" or cofactors != "":
                     perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int(
                         num_perm), perm_strata=strata_ob, model=model, method=method)
@@ -156,9 +159,9 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
             # Functions that sets the thresholds for the webinterface
             perm_output, suggestive, significant = process_rqtl_perm_results(
                 num_perm, perm_data_frame)
-            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species)
+            return perm_output, suggestive, significant, process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
         else:
-            return process_rqtl_results(result_data_frame, dataset.group.species)
+            return process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
 
 
 def generate_cross_from_rdata(dataset):
@@ -175,13 +178,16 @@ def generate_cross_from_rdata(dataset):
 # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
 def generate_cross_from_geno(dataset, scale_units):
 
+    cross_filename = (f"{str(dataset.group.name)}_"
+                      f"{generate_random_n_string(6)}")
+
     ro.r("""
        trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
        getGenoCode <- function(header, name = 'unk'){
          mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
          return(trim(strsplit(header[mat],':')[[1]][2]))
        }
-       GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+       GENOtoCSVR <- function(genotypes = '%s', out = '%s.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
          header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
          toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
          type <- getGenoCode(header, 'type')
@@ -215,7 +221,7 @@ def generate_cross_from_geno(dataset, scale_units):
          }
          return(cross)
       }
-    """ % (dataset.group.genofile, scale_units))
+    """ % (dataset.group.genofile, cross_filename, scale_units))
 
 
 def add_perm_strata(cross, perm_strata):
@@ -425,10 +431,7 @@ def process_pair_scan_results(result):
 
 
 def process_rqtl_perm_results(num_perm, results):
-    perm_vals = []
-    for line in str(results).split("\n")[1:(num_perm + 1)]:
-        #print("R/qtl permutation line:", line.split())
-        perm_vals.append(float(line.split()[1]))
+    perm_vals = item[0] for item in results
 
     perm_output = perm_vals
     suggestive = np.percentile(np.array(perm_vals), 67)
@@ -437,23 +440,21 @@ def process_rqtl_perm_results(num_perm, results):
     return perm_output, suggestive, significant
 
 
-# TODO: how to make this a one liner and not copy the stuff in a loop
-def process_rqtl_results(result, species_name):
+def process_rqtl_results(marker_names, results, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
     qtl_results = []
-    output = [tuple([result[j][i] for j in range(result.ncol)])
-              for i in range(result.nrow)]
 
-    for i, line in enumerate(result.iter_row()):
+    for i, line in enumerate(results):
         marker = {}
-        marker['name'] = result.rownames[i]
-        # ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file
-        if species_name == "mouse" and output[i][0] == 20:
+        marker['name'] = marker_names[i]
+        if species_name == "mouse" and line[0] == 20:
             marker['chr'] = "X"
         else:
-            marker['chr'] = output[i][0]
-        marker['cM'] = output[i][1]
-        marker['Mb'] = output[i][1]
-        marker['lod_score'] = output[i][2]
+            try:
+                marker['chr'] = int(line[0])
+            except:
+                marker['chr'] = line[0]
+        marker['cM'] = marker['Mb'] = line[1]
+        marker['lod_score'] = line[2]
         qtl_results.append(marker)
 
     return qtl_results
author	zsloan	2021-05-06 18:14:49 -0500
committer	GitHub	2021-05-06 18:14:49 -0500
commit	4956330a2a67a4b12c91ace7ef5e05662034cf89 (patch)
tree	bfd200e408f4e41da437e6a2c99edc004f37cbfc /wqflask
parent	3c430082b767a29c3e35cb03e68c1b22373ad353 (diff)
parent	cec8c8870f12a52ac20ffcc2f4a8a05003a13a9f (diff)
download	genenetwork2-4956330a2a67a4b12c91ace7ef5e05662034cf89.tar.gz