From 75bcab50abcf29c8a5f98a9f17b37457ac433d3b Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 27 Apr 2021 21:37:12 +0000
Subject: Stopped using the scanone function pointer when doing R/qtl mapping,
 since the results are not converted into a Python object in a way that
 preserves marker names (which is important because pseudomarkers can be
 added)

Instead the marker names are extracted from the scanone results using R immediately after they're generated, and then passed to process_rqtl_results
---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 39 ++++++++++++-----------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 4117a0e5..f593ae91 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -78,6 +78,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
         cross_object = calc_genoprob(cross_object)
     else:
         cross_object = calc_genoprob(cross_object, step=5, stepwidth="max")
+
     logger.info("after calc_genoprob");
     pheno_string = sanitize_rqtl_phenotype(vals)
     logger.info("phenostring done");
@@ -110,11 +111,13 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
         return process_pair_scan_results(result_data_frame)
     else:
         if do_control == "true" or cofactors != "":
-            logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method)
-            ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")')
+            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', addcovar=all_covars, model='{model}', method='{method}')")
+            result_data_frame = ro.r("qtl_results")
         else:
-            logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
+            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', model='{model}', method='{method}')")
+            result_data_frame = ro.r("qtl_results")
 
+        marker_names = np.asarray(ro.r("row.names(qtl_results)"))
         if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
             if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping
                 cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list)
@@ -129,9 +132,10 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
                     perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
 
             perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
-            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species)
+
+            return perm_output, suggestive, significant, process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
         else:
-            return process_rqtl_results(result_data_frame, dataset.group.species)
+            return process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
 
 def generate_cross_from_rdata(dataset):
     rdata_location  = locate(dataset.group.name + ".RData", "genotype/rdata")
@@ -144,7 +148,6 @@ def generate_cross_from_rdata(dataset):
     """ % (rdata_location))
 
 def generate_cross_from_geno(dataset, scale_units):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-
     ro.r("""
        trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
        getGenoCode <- function(header, name = 'unk'){
@@ -376,9 +379,8 @@ def process_pair_scan_results(result):
 
 def process_rqtl_perm_results(num_perm, results):
     perm_vals = []
-    for line in str(results).split("\n")[1:(num_perm+1)]:
-        #print("R/qtl permutation line:", line.split())
-        perm_vals.append(float(line.split()[1]))
+    for item in results:
+        perm_vals.append(item)
 
     perm_output = perm_vals
     suggestive = np.percentile(np.array(perm_vals), 67)
@@ -386,20 +388,21 @@ def process_rqtl_perm_results(num_perm, results):
 
     return perm_output, suggestive, significant
 
-def process_rqtl_results(result, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
+def process_rqtl_results(marker_names, results, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
     qtl_results = []
-    output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
 
-    for i, line in enumerate(result.iter_row()):
+    for i, line in enumerate(results):
         marker = {}
-        marker['name'] = result.rownames[i]
-        if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file
+        marker['name'] = marker_names[i]
+        if species_name == "mouse" and line[0] == 20:
             marker['chr'] = "X"
         else:
-            marker['chr'] = output[i][0]
-        marker['cM'] = output[i][1]
-        marker['Mb'] = output[i][1]
-        marker['lod_score'] = output[i][2]
+            try:
+                marker['chr'] = int(line[0])
+            except:
+                marker['chr'] = line[0]
+        marker['cM'] = marker['Mb'] = line[1]
+        marker['lod_score'] = line[2]
         qtl_results.append(marker)
 
     return qtl_results
\ No newline at end of file
-- 
cgit v1.2.3


From 1845086220e427ed38d1bb2b216dc53de3760d64 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 27 Apr 2021 22:24:11 +0000
Subject: Randomized cross object filename, since I think it throws an error if
 the same file is being written to simultaneously (or being written to while
 it's being read)

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index f593ae91..c70fbbec 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -10,6 +10,7 @@ from base.trait import create_trait
 from base.data_set import create_dataset
 from utility import webqtlUtil
 from utility.tools import locate, TEMPDIR
+from wqflask.marker_regression.gemma_mapping import generate_random_n_string
 from flask import g
 
 import utility.logger
@@ -148,13 +149,17 @@ def generate_cross_from_rdata(dataset):
     """ % (rdata_location))
 
 def generate_cross_from_geno(dataset, scale_units):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+
+    cross_filename = (f"{str(dataset.group.name)}_"
+                      f"{generate_random_n_string(6)}")
+
     ro.r("""
        trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
        getGenoCode <- function(header, name = 'unk'){
          mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
          return(trim(strsplit(header[mat],':')[[1]][2]))
        }
-       GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+       GENOtoCSVR <- function(genotypes = '%s', out = '%s.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
          header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
          toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
          type <- getGenoCode(header, 'type')
@@ -188,7 +193,7 @@ def generate_cross_from_geno(dataset, scale_units):        # TODO: Need to figur
          }
          return(cross)
       }
-    """ % (dataset.group.genofile, scale_units))
+    """ % (dataset.group.genofile, cross_filename, scale_units))
 
 def add_perm_strata(cross, perm_strata):
     col_string = 'c("the_strata")'
-- 
cgit v1.2.3


From 63ca33f75fcb67ba8c3851c60088b62becb5f735 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 27 Apr 2021 22:25:44 +0000
Subject: Convert to array and transpose R/qtl scanone results when not using
 cofactors. For some reason the rows/columns are inverted when converted to a
 python object when doing scanone with cofactors vs without cofactors

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index c70fbbec..cd43577e 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -116,7 +116,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec
             result_data_frame = ro.r("qtl_results")
         else:
             ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', model='{model}', method='{method}')")
-            result_data_frame = ro.r("qtl_results")
+            result_data_frame = np.asarray(ro.r("qtl_results")).T
 
         marker_names = np.asarray(ro.r("row.names(qtl_results)"))
         if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
-- 
cgit v1.2.3


From 2444d60a93ef7c9900ed9a52877bff0ee08fbfb6 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 6 May 2021 22:54:05 +0000
Subject: Because each permutations value is returned as an array, need to just
 take the first item to convert to numbers

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index cd43577e..b9cb99cc 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -385,7 +385,7 @@ def process_pair_scan_results(result):
 def process_rqtl_perm_results(num_perm, results):
     perm_vals = []
     for item in results:
-        perm_vals.append(item)
+        perm_vals.append(item[0])
 
     perm_output = perm_vals
     suggestive = np.percentile(np.array(perm_vals), 67)
-- 
cgit v1.2.3


From cec8c8870f12a52ac20ffcc2f4a8a05003a13a9f Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 6 May 2021 23:14:22 +0000
Subject: Used a list comprehension for perm_vals as mentioned in Bonface's
 comment

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 072e4530..06eceada 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -431,9 +431,7 @@ def process_pair_scan_results(result):
 
 
 def process_rqtl_perm_results(num_perm, results):
-    perm_vals = []
-    for item in results:
-        perm_vals.append(item[0])
+    perm_vals = item[0] for item in results
 
     perm_output = perm_vals
     suggestive = np.percentile(np.array(perm_vals), 67)
-- 
cgit v1.2.3