Added function for getting trait data type, plus added mapping scale changes to Danny's partially fixed R/qtl code itp_covariate_testing

author: zsloan 2020-05-06 13:22:41 -0500
committer: zsloan 2020-05-06 13:22:41 -0500
commit: 8a2f61a999e6dc653a9ca2de802137a1a4107e35 (patch)
tree: e5cb1cdb12fa0c676c1f9d4c8ecb26952f8a7cb1
parent: dc8254552a38201c968b92e88c4d243209dce108 (diff)
download: genenetwork2-itp_covariate_testing.tar.gz
1 files changed, 37 insertions, 25 deletions
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index ad3a4cb7..dd35d89a 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -1,6 +1,9 @@
 import rpy2.robjects as ro
 import rpy2.robjects.numpy2ri as np2r
 import numpy as np
+import json
+
+from flask import g
 
 from base.webqtlConfig import TMPDIR
 from base.trait import GeneralTrait
@@ -11,7 +14,7 @@ from utility.tools import locate, TEMPDIR
 import utility.logger
 logger = utility.logger.getLogger(__name__ )
 
-def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
+def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
     logger.info("Start run_rqtl_geno");
     ## Get pointers to some common R functions
     r_library     = ro.r["library"]                 # Map the library function
@@ -36,7 +39,13 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe
     #    genofilelocation  = locate(crossname + ".RData", "genotype/rdata")
     #    cross_object = read_cross_from_rdata(genofilelocation)  # Map the local GENOtoCSVR function
     #except:
-    generate_cross_from_geno(dataset)
+
+    if mapping_scale == "morgan":
+        scale_units = "cM"
+    else:
+        scale_units = "Mb"
+
+    generate_cross_from_geno(dataset, scale_units)
     GENOtoCSVR                 = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function
     crossfilelocation = TMPDIR + crossname + ".cross"
     if dataset.group.genofile:
@@ -104,11 +113,9 @@ def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, pe
                     perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
 
             perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
-            the_scale = check_mapping_scale(genofilelocation)
-            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale
+            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species)
         else:
-            the_scale = check_mapping_scale(genofilelocation)
-            return process_rqtl_results(result_data_frame, dataset.group.species), the_scale
+            return process_rqtl_results(result_data_frame, dataset.group.species)
 
 def generate_cross_from_rdata(dataset):
     rdata_location  = locate(dataset.group.name + ".RData", "genotype/rdata")
@@ -120,7 +127,7 @@ def generate_cross_from_rdata(dataset):
        }
     """ % (rdata_location))
 
-def generate_cross_from_geno(dataset):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+def generate_cross_from_geno(dataset, scale_units):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
 
     ro.r("""
        trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
@@ -135,7 +142,7 @@ def generate_cross_from_geno(dataset):        # TODO: Need to figure out why som
          if(type == '4-way'){
             genocodes <- c('1','2','3','4')
          } else {
-            genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))                # Get the genotype codes
+            genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))             # Get the genotype codes
          }
          genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
          cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
@@ -143,14 +150,23 @@ def generate_cross_from_geno(dataset):        # TODO: Need to figure out why som
          if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
          outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
                           c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
-                          cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)]))                          # Genotypes
+                          cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)]))                          # Genotypes
          write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
          require(qtl)
-         cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                         # Load the created cross file using R/qtl read.cross
-         if(type == 'riset') cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
+         if(type == '4-way'){
+           cat('Loading in as 4-WAY\n')
+           cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE)                 # Load the created cross file using R/qtl read.cross
+         }else{
+           cat('Loading in as normal\n')
+           cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                       # Load the created cross file using R/qtl read.cross
+         }
+         if(type == 'riset'){
+           cat('Converting to RISELF\n')
+           cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
+         }
          return(cross)
       }
-    """ % (dataset.group.genofile))
+    """ % (dataset.group.genofile, scale_units))
 
 def add_perm_strata(cross, perm_strata):
     col_string = 'c("the_strata")'
@@ -324,18 +340,14 @@ def process_rqtl_results(result, species_name):        # TODO: how to make this
 
     return qtl_results
 
-def check_mapping_scale(genofile_location):
-    scale = "physic"
-    with open(genofile_location, "r") as geno_fh:
-        for line in geno_fh:
-            if line[0] == "@" or line[0] == "#":
+def get_trait_data_type(trait_db_string):
+    # Get a trait's type (numeric, categorical, etc) from the DB
+    the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'"
+    results_json = g.db.execute(the_query).fetchone()
 
-                if "@scale" in line:
-                    scale = line.split(":")[1].strip()
-                    break
-                else:
-                    continue
-            else:
-                break
+    results_ob = json.loads(results_json[0])
 
-    return scale
\ No newline at end of file
+    if trait_db_string in results_ob:
+        return results_ob[trait_db_string]
+    else:
+        return "numeric"
author	zsloan	2020-05-06 13:22:41 -0500
committer	zsloan	2020-05-06 13:22:41 -0500
commit	8a2f61a999e6dc653a9ca2de802137a1a4107e35 (patch)
tree	e5cb1cdb12fa0c676c1f9d4c8ecb26952f8a7cb1
parent	dc8254552a38201c968b92e88c4d243209dce108 (diff)
download	genenetwork2-itp_covariate_testing.tar.gz