Committing some changes for testing covariates

author: zsloan 2020-05-04 13:42:48 -0500
committer: zsloan 2020-05-04 13:42:48 -0500
commit: a1db50c2a09e9666b18aeada21785c86e715b865 (patch)
tree: 212e2961b1ae1c3f4fbbecec7debae8279f3ae26 /wqflask
parent: 5e23e579b731fd150d01511c622ffb6fd9dafdbc (diff)
download: genenetwork2-a1db50c2a09e9666b18aeada21785c86e715b865.tar.gz
3 files changed, 322 insertions, 317 deletions
diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 8c294460..ed63ad92 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -1,311 +1,313 @@
-import rpy2.robjects as ro
-import rpy2.robjects.numpy2ri as np2r
-import numpy as np
-
-from base.webqtlConfig import TMPDIR
-from base.trait import GeneralTrait
-from base.data_set import create_dataset
-from utility import webqtlUtil
-from utility.tools import locate, TEMPDIR
-
-import utility.logger
-logger = utility.logger.getLogger(__name__ )
-
-def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
-    ## Get pointers to some common R functions
-    r_library     = ro.r["library"]                 # Map the library function
-    r_c           = ro.r["c"]                       # Map the c function
-    plot          = ro.r["plot"]                    # Map the plot function
-    png           = ro.r["png"]                     # Map the png function
-    dev_off       = ro.r["dev.off"]                 # Map the device off function
-
-    print(r_library("qtl"))                         # Load R/qtl
-
-    ## Get pointers to some R/qtl functions
-    scanone                    = ro.r["scanone"]               # Map the scanone function
-    scantwo                    = ro.r["scantwo"]               # Map the scantwo function
-    calc_genoprob              = ro.r["calc.genoprob"]         # Map the calc.genoprob function
-
-    crossname = dataset.group.name
-    #try:
-    #    generate_cross_from_rdata(dataset)
-    #    read_cross_from_rdata      = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function
-    #    genofilelocation  = locate(crossname + ".RData", "genotype/rdata")
-    #    cross_object = read_cross_from_rdata(genofilelocation)  # Map the local GENOtoCSVR function
-    #except:
-
-    if mapping_scale == "morgan":
-        scale_units = "cM"
-    else:
-        scale_units = "Mb"
-
-    generate_cross_from_geno(dataset, scale_units)
-    GENOtoCSVR                 = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function
-    crossfilelocation = TMPDIR + crossname + ".cross"
-    if dataset.group.genofile:
-        genofilelocation  = locate(dataset.group.genofile, "genotype")
-    else:
-        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
-    cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)      # TODO: Add the SEX if that is available
-
-    if manhattan_plot:
-        cross_object = calc_genoprob(cross_object)
-    else:
-        cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
-
-    logger.debug("VAL LEN:", len(vals))
-
-    pheno_string = sanitize_rqtl_phenotype(vals)
-
-    cross_object = add_phenotype(cross_object, pheno_string, "the_pheno")                 # Add the phenotype
-
-    # Scan for QTLs
-    marker_covars = create_marker_covariates(control_marker, cross_object)  # Create the additive covariate markers
-
-    if cofactors != "":
-        cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples)                            # Create the covariates from selected traits
-        ro.r('all_covars <- cbind(marker_covars, trait_covars)')
-    else:
-        ro.r('all_covars <- marker_covars')
-
-    covars = ro.r['all_covars']
-
-    if pair_scan:
-        if do_control == "true":
-            logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16)
-        else:
-            logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
-
-        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
-        png(file=TEMPDIR+pair_scan_filename)
-        plot(result_data_frame)
-        dev_off()
-
-        return process_pair_scan_results(result_data_frame)
-    else:
-        if do_control == "true" or cofactors != "":
-            logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method)
-        else:
-            logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
-
-        if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
-            if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping
-                cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list)
-                if do_control == "true" or cofactors != "":
-                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method)
-                else:
-                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method)
-            else:
-                if do_control == "true" or cofactors != "":
-                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method)
-                else:
-                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
-
-            perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
-            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species)
-        else:
-            return process_rqtl_results(result_data_frame, dataset.group.species)
-
-def generate_cross_from_rdata(dataset):
-    rdata_location  = locate(dataset.group.name + ".RData", "genotype/rdata")
-    ro.r("""
-       generate_cross_from_rdata <- function(filename = '%s') {
-           load(file=filename)
-           cross = cunique
-           return(cross)
-       }
-    """ % (rdata_location))
-
-def generate_cross_from_geno(dataset, scale_units):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-
-    ro.r("""
-       trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
-
-       getGenoCode <- function(header, name = 'unk'){
-         mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
-         return(trim(strsplit(header[mat],':')[[1]][2]))
-       }
-
-       GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
-         header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
-         toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
-
-         type <- getGenoCode(header, 'type')
-         if(type == '4-way'){
-            genocodes <- c('1','2','3','4')
-            genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#', crosstype="4way")
-         } else {
-            genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))             # Get the genotype codes
-            genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
-         }
-         cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
-         if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
-         if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
-         outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
-                          c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
-                          cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)]))                    # Genotypes
-         write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
-         require(qtl)
-         if(type == '4-way'){
-           cat('Loading in as 4-WAY\n')
-           cross = read.cross(file=out, 'csvr', genotypes=genocodes)
-           #cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE)                 # Load the created cross file using R/qtl read.cross
-         }else{
-           cat('Loading in as normal\n')
-           cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                       # Load the created cross file using R/qtl read.cross
-         }
-         if(type == 'riset'){
-           cat('Converting to RISELF\n')
-           cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
-         }
-         return(cross)
-      }
-    """ % (dataset.group.genofile, scale_units))
-
-def add_perm_strata(cross, perm_strata):
-    col_string = 'c("the_strata")'
-    perm_strata_string = "c("
-    for item in perm_strata:
-        perm_strata_string += str(item) + ","
-
-    perm_strata_string = perm_strata_string[:-1] + ")"
-
-    cross = add_phenotype(cross, perm_strata_string, "the_strata")
-
-    strata_ob = pull_var("perm_strata", cross, col_string)
-
-    return cross, strata_ob
-
-def sanitize_rqtl_phenotype(vals):
-    pheno_as_string = "c("
-    for i, val in enumerate(vals):
-        if val == "x":
-            if i < (len(vals) - 1):
-                pheno_as_string +=  "NA,"
-            else:
-                pheno_as_string += "NA"
-        else:
-            if i < (len(vals) - 1):
-                pheno_as_string += str(val) + ","
-            else:
-                pheno_as_string += str(val)
-    pheno_as_string += ")"
-
-    return pheno_as_string
-
-def add_phenotype(cross, pheno_as_string, col_name):
-    ro.globalenv["the_cross"] = cross
-    ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), ' + col_name + ' = '+ pheno_as_string +')')
-    return ro.r["the_cross"]
-
-def pull_var(var_name, cross, var_string):
-    ro.globalenv["the_cross"] = cross
-    ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')')
-
-    return ro.r[var_name]
-
-def add_cofactors(cross, this_dataset, covariates, samples):
-    ro.numpy2ri.activate()
-
-    covariate_list = covariates.split(",")
-    covar_name_string = "c("
-    for i, covariate in enumerate(covariate_list):
-        this_covar_data = []
-        covar_as_string = "c("
-        trait_name = covariate.split(":")[0]
-        dataset_ob = create_dataset(covariate.split(":")[1])
-        trait_ob = GeneralTrait(dataset=dataset_ob,
-                                name=trait_name,
-                                cellid=None)
-
-        this_dataset.group.get_samplelist()
-        trait_samples = this_dataset.group.samplelist
-        trait_sample_data = trait_ob.data
-        for index, sample in enumerate(trait_samples):
-            if sample in samples:
-                if sample in trait_sample_data:
-                    sample_value = trait_sample_data[sample].value
-                    this_covar_data.append(sample_value)
-                else:
-                    this_covar_data.append("NA")
-
-        for j, item in enumerate(this_covar_data):
-            if j < (len(this_covar_data) - 1):
-                covar_as_string += str(item) + ","
-            else:
-                covar_as_string += str(item)
-
-        covar_as_string += ")"
-
-        col_name = "covar_" + str(i)
-        cross = add_phenotype(cross, covar_as_string, col_name)
-
-        if i < (len(covariate_list) - 1):
-            covar_name_string += '"' + col_name + '", '
-        else:
-            covar_name_string += '"' + col_name + '"'
-
-    covar_name_string += ")"
-
-    covars_ob = pull_var("trait_covars", cross, covar_name_string)
-
-    return cross, covars_ob
-
-def create_marker_covariates(control_marker, cross):
-    ro.globalenv["the_cross"] = cross
-    ro.r('genotypes <- pull.geno(the_cross)')                             # Get the genotype matrix
-    userinputS = control_marker.replace(" ", "").split(",")               # TODO: sanitize user input, Never Ever trust a user
-    covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
-    ro.r('covnames <- c(' + covariate_names + ')')
-    ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
-    ro.r('covnames <- covnames[covInGeno]')
-    ro.r("cat('covnames (purged): ', covnames,'\n')")
-    ro.r('marker_covars <- genotypes[,covnames]')                            # Get the covariate matrix by using the marker name as index to the genotype file
-
-    return ro.r["marker_covars"]
-
-def process_pair_scan_results(result):
-    pair_scan_results = []
-
-    result = result[1]
-    output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
-
-    for i, line in enumerate(result.iter_row()):
-        marker = {}
-        marker['name'] = result.rownames[i]
-        marker['chr1'] = output[i][0]
-        marker['Mb'] = output[i][1]
-        marker['chr2'] = int(output[i][2])
-        pair_scan_results.append(marker)
-
-    return pair_scan_results
-
-def process_rqtl_perm_results(num_perm, results):
-    perm_vals = []
-    for line in str(results).split("\n")[1:(num_perm+1)]:
-        #print("R/qtl permutation line:", line.split())
-        perm_vals.append(float(line.split()[1]))
-
-    perm_output = perm_vals
-    suggestive = np.percentile(np.array(perm_vals), 67)
-    significant = np.percentile(np.array(perm_vals), 95)
-
-    return perm_output, suggestive, significant
-
-def process_rqtl_results(result, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
-    qtl_results = []
-    output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
-
-    for i, line in enumerate(result.iter_row()):
-        marker = {}
-        marker['name'] = result.rownames[i]
-        if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file
-            marker['chr'] = "X"
-        else:
-            marker['chr'] = output[i][0]
-        marker['cM'] = output[i][1]
-        marker['Mb'] = output[i][1]
-        marker['lod_score'] = output[i][2]
-        qtl_results.append(marker)
-
-    return qtl_results
\ No newline at end of file
+import rpy2.robjects as ro
+import rpy2.robjects.numpy2ri as np2r
+import numpy as np
+
+from base.webqtlConfig import TMPDIR
+from base.trait import GeneralTrait
+from base.data_set import create_dataset
+from utility import webqtlUtil
+from utility.tools import locate, TEMPDIR
+
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
+def run_rqtl_geno(vals, samples, dataset, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
+    ## Get pointers to some common R functions
+    r_library     = ro.r["library"]                 # Map the library function
+    r_c           = ro.r["c"]                       # Map the c function
+    plot          = ro.r["plot"]                    # Map the plot function
+    png           = ro.r["png"]                     # Map the png function
+    dev_off       = ro.r["dev.off"]                 # Map the device off function
+
+    print(r_library("qtl"))                         # Load R/qtl
+
+    ## Get pointers to some R/qtl functions
+    scanone                    = ro.r["scanone"]               # Map the scanone function
+    scantwo                    = ro.r["scantwo"]               # Map the scantwo function
+    calc_genoprob              = ro.r["calc.genoprob"]         # Map the calc.genoprob function
+
+    crossname = dataset.group.name
+    #try:
+    #    generate_cross_from_rdata(dataset)
+    #    read_cross_from_rdata      = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function
+    #    genofilelocation  = locate(crossname + ".RData", "genotype/rdata")
+    #    cross_object = read_cross_from_rdata(genofilelocation)  # Map the local GENOtoCSVR function
+    #except:
+    generate_cross_from_geno(dataset)
+    GENOtoCSVR                 = ro.r["GENOtoCSVR"]            # Map the local GENOtoCSVR function
+    crossfilelocation = TMPDIR + crossname + ".cross"
+    if dataset.group.genofile:
+        genofilelocation  = locate(dataset.group.genofile, "genotype")
+    else:
+        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
+    cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)      # TODO: Add the SEX if that is available
+
+    the_version = ro.r["packageVersion('qtl')"]
+    logger.debug("THE R VERSION:", the_version)
+
+    ro.r('save.image(file = "/home/zas1024/gn2-zach/tmp/HET3_cofactor_test2.RData")')
+
+    if manhattan_plot:
+        cross_object = calc_genoprob(cross_object)
+    else:
+        cross_object = calc_genoprob(cross_object, step=1, stepwidth="max")
+
+    pheno_string = sanitize_rqtl_phenotype(vals)
+
+    cross_object = add_phenotype(cross_object, pheno_string, "the_pheno")                 # Add the phenotype
+
+    # Scan for QTLs
+    marker_covars = create_marker_covariates(control_marker, cross_object)  # Create the additive covariate markers
+
+    if cofactors != "":
+        cross_object, trait_covars = add_cofactors(cross_object, dataset, cofactors, samples)                            # Create the covariates from selected traits
+        ro.r('all_covars <- cbind(marker_covars, trait_covars)')
+    else:
+        ro.r('all_covars <- marker_covars')
+
+    covars = ro.r['all_covars']
+
+    if pair_scan:
+        if do_control == "true":
+            logger.info("Using covariate"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method, n_cluster = 16)
+        else:
+            logger.info("No covariates"); result_data_frame = scantwo(cross_object, pheno = "the_pheno", model=model, method=method, n_cluster = 16)
+
+        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
+        png(file=TEMPDIR+pair_scan_filename)
+        plot(result_data_frame)
+        dev_off()
+
+        return process_pair_scan_results(result_data_frame)
+    else:
+        if do_control == "true" or cofactors != "":
+            logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method)
+        else:
+            logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method)
+
+        if num_perm > 0 and permCheck == "ON":                                                                   # Do permutation (if requested by user)
+            if len(perm_strata_list) > 0: #ZS: The strata list would only be populated if "Stratified" was checked on before mapping
+                cross_object, strata_ob = add_perm_strata(cross_object, perm_strata_list)
+                if do_control == "true" or cofactors != "":
+                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), perm_strata = strata_ob, model=model, method=method)
+                else:
+                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, perm_strata = strata_ob, model=model, method=method)
+            else:
+                if do_control == "true" or cofactors != "":
+                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", addcovar = covars, n_perm = int(num_perm), model=model, method=method)
+                else:
+                    perm_data_frame = scanone(cross_object, pheno_col = "the_pheno", n_perm = num_perm, model=model, method=method)
+
+            perm_output, suggestive, significant = process_rqtl_perm_results(num_perm, perm_data_frame)          # Functions that sets the thresholds for the webinterface
+            the_scale = check_mapping_scale(genofilelocation)
+            return perm_output, suggestive, significant, process_rqtl_results(result_data_frame, dataset.group.species), the_scale
+        else:
+            the_scale = check_mapping_scale(genofilelocation)
+            return process_rqtl_results(result_data_frame, dataset.group.species), the_scale
+
+def generate_cross_from_rdata(dataset):
+    rdata_location  = locate(dataset.group.name + ".RData", "genotype/rdata")
+    ro.r("""
+       generate_cross_from_rdata <- function(filename = '%s') {
+           load(file=filename)
+           cross = cunique
+           return(cross)
+       }
+    """ % (rdata_location))
+
+def generate_cross_from_geno(dataset):        # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
+
+    ro.r("""
+       trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
+       getGenoCode <- function(header, name = 'unk'){
+         mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
+         return(trim(strsplit(header[mat],':')[[1]][2]))
+       }
+       GENOtoCSVR <- function(genotypes = '%s', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
+         header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
+         toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
+         type <- getGenoCode(header, 'type')
+         if(type == '4-way'){
+            genocodes <- c('1','2','3','4')
+         } else {
+            genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))                # Get the genotype codes
+         }
+         genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
+         cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
+         if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
+         if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
+         outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
+                          c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
+                          cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)]))                          # Genotypes
+         write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
+         require(qtl)
+         cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="4way", convertXdata=FALSE)                 # Load the created cross file using R/qtl read.cross
+         #cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                         # Load the created cross file using R/qtl read.cross
+         if(type == 'riset') cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
+         return(cross)
+      }
+    """ % (dataset.group.genofile))
+
+def add_perm_strata(cross, perm_strata):
+    col_string = 'c("the_strata")'
+    perm_strata_string = "c("
+    for item in perm_strata:
+        perm_strata_string += str(item) + ","
+
+    perm_strata_string = perm_strata_string[:-1] + ")"
+
+    cross = add_phenotype(cross, perm_strata_string, "the_strata")
+
+    strata_ob = pull_var("perm_strata", cross, col_string)
+
+    return cross, strata_ob
+
+def sanitize_rqtl_phenotype(vals):
+    pheno_as_string = "c("
+    for i, val in enumerate(vals):
+        if val == "x":
+            if i < (len(vals) - 1):
+                pheno_as_string +=  "NA,"
+            else:
+                pheno_as_string += "NA"
+        else:
+            if i < (len(vals) - 1):
+                pheno_as_string += str(val) + ","
+            else:
+                pheno_as_string += str(val)
+    pheno_as_string += ")"
+
+    return pheno_as_string
+
+def add_phenotype(cross, pheno_as_string, col_name):
+    ro.globalenv["the_cross"] = cross
+    ro.r('the_cross$pheno <- cbind(pull.pheno(the_cross), ' + col_name + ' = '+ pheno_as_string +')')
+    return ro.r["the_cross"]
+
+def pull_var(var_name, cross, var_string):
+    ro.globalenv["the_cross"] = cross
+    ro.r(var_name +' <- pull.pheno(the_cross, ' + var_string + ')')
+
+    return ro.r[var_name]
+
+def add_cofactors(cross, this_dataset, covariates, samples):
+    ro.numpy2ri.activate()
+
+    covariate_list = covariates.split(",")
+    covar_name_string = "c("
+    for i, covariate in enumerate(covariate_list):
+        this_covar_data = []
+        covar_as_string = "c("
+        trait_name = covariate.split(":")[0]
+        dataset_ob = create_dataset(covariate.split(":")[1])
+        trait_ob = GeneralTrait(dataset=dataset_ob,
+                                name=trait_name,
+                                cellid=None)
+
+        this_dataset.group.get_samplelist()
+        trait_samples = this_dataset.group.samplelist
+        trait_sample_data = trait_ob.data
+        for index, sample in enumerate(trait_samples):
+            if sample in samples:
+                if sample in trait_sample_data:
+                    sample_value = trait_sample_data[sample].value
+                    this_covar_data.append(sample_value)
+                else:
+                    this_covar_data.append("NA")
+
+        for j, item in enumerate(this_covar_data):
+            if j < (len(this_covar_data) - 1):
+                covar_as_string += str(item) + ","
+            else:
+                covar_as_string += str(item)
+
+        covar_as_string += ")"
+
+        col_name = "covar_" + str(i)
+        cross = add_phenotype(cross, covar_as_string, col_name)
+
+        if i < (len(covariate_list) - 1):
+            covar_name_string += '"' + col_name + '", '
+        else:
+            covar_name_string += '"' + col_name + '"'
+
+    covar_name_string += ")"
+
+    covars_ob = pull_var("trait_covars", cross, covar_name_string)
+
+    return cross, covars_ob
+
+def create_marker_covariates(control_marker, cross):
+    ro.globalenv["the_cross"] = cross
+    ro.r('genotypes <- pull.geno(the_cross)')                             # Get the genotype matrix
+    userinputS = control_marker.replace(" ", "").split(",")               # TODO: sanitize user input, Never Ever trust a user
+    covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS)
+    ro.r('covnames <- c(' + covariate_names + ')')
+    ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
+    ro.r('covnames <- covnames[covInGeno]')
+    ro.r("cat('covnames (purged): ', covnames,'\n')")
+    ro.r('marker_covars <- genotypes[,covnames]')                            # Get the covariate matrix by using the marker name as index to the genotype file
+
+    return ro.r["marker_covars"]
+
+def process_pair_scan_results(result):
+    pair_scan_results = []
+
+    result = result[1]
+    output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+
+    for i, line in enumerate(result.iter_row()):
+        marker = {}
+        marker['name'] = result.rownames[i]
+        marker['chr1'] = output[i][0]
+        marker['Mb'] = output[i][1]
+        marker['chr2'] = int(output[i][2])
+        pair_scan_results.append(marker)
+
+    return pair_scan_results
+
+def process_rqtl_perm_results(num_perm, results):
+    perm_vals = []
+    for line in str(results).split("\n")[1:(num_perm+1)]:
+        #print("R/qtl permutation line:", line.split())
+        perm_vals.append(float(line.split()[1]))
+
+    perm_output = perm_vals
+    suggestive = np.percentile(np.array(perm_vals), 67)
+    significant = np.percentile(np.array(perm_vals), 95)
+
+    return perm_output, suggestive, significant
+
+def process_rqtl_results(result, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
+    qtl_results = []
+    output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+
+    for i, line in enumerate(result.iter_row()):
+        marker = {}
+        marker['name'] = result.rownames[i]
+        if species_name == "mouse" and output[i][0] == 20: #ZS: This is awkward, but I'm not sure how to change the 20s to Xs in the RData file
+            marker['chr'] = "X"
+        else:
+            marker['chr'] = output[i][0]
+        marker['cM'] = output[i][1]
+        marker['Mb'] = output[i][1]
+        marker['lod_score'] = output[i][2]
+        qtl_results.append(marker)
+
+    return qtl_results
+
+def check_mapping_scale(genofile_location):
+    scale = "physic"
+    with open(genofile_location, "r") as geno_fh:
+        for line in geno_fh:
+            if line[0] == "@" or line[0] == "#":
+
+                if "@scale" in line:
+                    scale = line.split(":")[1].strip()
+                    break
+                else:
+                    continue
+            else:
+                break
+
+    return scale
\ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index 5f7710ab..8fea295f 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -257,9 +257,13 @@ class RunMapping(object):
             #if start_vars['pair_scan'] == "true":
             #    self.pair_scan = True
             if self.permCheck and self.num_perm > 0:
-                self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+                self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
             else:
-                results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+                results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+            # if self.permCheck and self.num_perm > 0:
+            #     self.perm_output, self.suggestive, self.significant, results= rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+            # else:
+            #     results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
         elif self.mapping_method == "reaper":
             if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON
                 if "additiveCheck" in start_vars:
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 40e344b8..f73079f8 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -175,10 +175,7 @@ class ShowTrait(object):
             if self.genofiles:
                 self.scales_in_geno = get_genotype_scales(self.genofiles)
             else:
-                self.scales_in_geno = get_genotype_scales(self.dataset.group + ".geno")
-
-            if len(self.scales_in_geno) < 2:
-                hddn['mapping_scale'] = self.scales_in_geno[self.scales_in_geno.keys()[0]][0]
+                self.scales_in_geno = get_genotype_scales(self.dataset.group.name + ".geno")
         else:
             self.scales_in_geno = {}
 
@@ -255,6 +252,8 @@ class ShowTrait(object):
         hddn['compare_traits'] = []
         hddn['export_data'] = ""
         hddn['export_format'] = "excel"
+        if len(self.scales_in_geno) < 2:
+            hddn['mapping_scale'] = self.scales_in_geno[self.scales_in_geno.keys()[0]][0]
 
         # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
         self.hddn = hddn
author	zsloan	2020-05-04 13:42:48 -0500
committer	zsloan	2020-05-04 13:42:48 -0500
commit	a1db50c2a09e9666b18aeada21785c86e715b865 (patch)
tree	212e2961b1ae1c3f4fbbecec7debae8279f3ae26 /wqflask
parent	5e23e579b731fd150d01511c622ffb6fd9dafdbc (diff)
download	genenetwork2-a1db50c2a09e9666b18aeada21785c86e715b865.tar.gz