From c2fac869f5ac1a398677d5646d34a1a0704e29e4 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 20 May 2021 21:44:12 +0000
Subject: Re-implemented R/qtl to get the results from the GN3 API

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 569 +++++-----------------
 wqflask/wqflask/marker_regression/run_mapping.py  |   6 +-
 2 files changed, 135 insertions(+), 440 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 1c8477bf..3f4899b0 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -1,460 +1,155 @@
-import rpy2.robjects as ro
-import rpy2.robjects.numpy2ri as np2r
-import numpy as np
-import json
-
-from flask import g
+import csv
+import hashlib
+import io
+import requests
+import shutil
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import TextIO
 
 from base.webqtlConfig import TMPDIR
 from base.trait import create_trait
-from base.data_set import create_dataset
-from utility import webqtlUtil
-from utility.tools import locate, TEMPDIR
-from wqflask.marker_regression.gemma_mapping import generate_random_n_string
-from flask import g
+from utility.tools import locate
 
 import utility.logger
 logger = utility.logger.getLogger(__name__)
 
-# Get a trait's type (numeric, categorical, etc) from the DB
-
-
-def get_trait_data_type(trait_db_string):
-    logger.info("get_trait_data_type")
-    the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'"
-    logger.info("the_query done")
-    results_json = g.db.execute(the_query).fetchone()
-    logger.info("the_query executed")
-    results_ob = json.loads(results_json[0])
-    logger.info("json results loaded")
-    if trait_db_string in results_ob:
-        logger.info("found")
-        return results_ob[trait_db_string]
-    else:
-        logger.info("not found")
-        return "numeric"
-
+GN3_RQTL_URL = "http://localhost:8086/api/rqtl/compute"
+GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp"
 
-# Run qtl mapping using R/qtl
-def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
-    logger.info("Start run_rqtl_geno")
-    # Get pointers to some common R functions
-    r_library = ro.r["library"]                 # Map the library function
-    r_c = ro.r["c"]                       # Map the c function
-    plot = ro.r["plot"]                    # Map the plot function
-    png = ro.r["png"]                     # Map the png function
-    dev_off = ro.r["dev.off"]                 # Map the device off function
+def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
+    """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)"""
 
-    print((r_library("qtl")))                         # Load R/qtl
-
-    logger.info("QTL library loaded")
-
-    # Get pointers to some R/qtl functions
-    scanone = ro.r["scanone"]               # Map the scanone function
-    scantwo = ro.r["scantwo"]               # Map the scantwo function
-    # Map the calc.genoprob function
-    calc_genoprob = ro.r["calc.genoprob"]
-
-    crossname = dataset.group.name
-    # try:
-    #    generate_cross_from_rdata(dataset)
-    #    read_cross_from_rdata      = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function
-    #    genofilelocation  = locate(crossname + ".RData", "genotype/rdata")
-    #    cross_object = read_cross_from_rdata(genofilelocation)  # Map the local GENOtoCSVR function
-    # except:
-
-    if mapping_scale == "morgan":
-        scale_units = "cM"
-    else:
-        scale_units = "Mb"
-
-    generate_cross_from_geno(dataset, scale_units)
-    # Map the local GENOtoCSVR function
-    GENOtoCSVR = ro.r["GENOtoCSVR"]
-    crossfilelocation = TMPDIR + crossname + ".cross"
+    pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors)
     if dataset.group.genofile:
-        genofilelocation = locate(dataset.group.genofile, "genotype")
-    else:
-        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
-    logger.info("Going to create a cross from geno")
-    # TODO: Add the SEX if that is available
-    cross_object = GENOtoCSVR(genofilelocation, crossfilelocation)
-    logger.info("before calc_genoprob")
-    if manhattan_plot:
-        cross_object = calc_genoprob(cross_object)
+        geno_file = locate(dataset.group.genofile, "genotype")
     else:
-        cross_object = calc_genoprob(cross_object, step=5, stepwidth="max")
-    logger.info("after calc_genoprob")
-
-    pheno_string = sanitize_rqtl_phenotype(vals)
-    logger.info("phenostring done")
-    names_string = sanitize_rqtl_names(samples)
-    logger.info("sanitized pheno and names")
-    # Add the phenotype
-    cross_object = add_phenotype(cross_object, pheno_string, "the_pheno")
-    # Add the phenotype
-    cross_object = add_names(cross_object, names_string, "the_names")
-    logger.info("Added pheno and names")
-    # Create the additive covariate markers
-    marker_covars = create_marker_covariates(control_marker, cross_object)
-    logger.info("Marker covars done")
-    if cofactors != "":
-        logger.info("Cofactors: " + cofactors)
-        # Create the covariates from selected traits
-        cross_object, trait_covars = add_cofactors(
-            cross_object, dataset, cofactors, samples)
-        ro.r('all_covars <- cbind(marker_covars, trait_covars)')
-    else:
-        ro.r('all_covars <- marker_covars')
-    covars = ro.r['all_covars']
-    # DEBUG to save the session object to file
-    if pair_scan:
-        if do_control == "true":
-            logger.info("Using covariate")
-            result_data_frame = scantwo(
-                cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster=16)
-        else:
-            logger.info("No covariates")
-            result_data_frame = scantwo(
-                cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16)
-
-        pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
-        png(file=TEMPDIR + pair_scan_filename)
-        plot(result_data_frame)
-        dev_off()
-
-        return process_pair_scan_results(result_data_frame)
-    else:
-        if do_control == "true" or cofactors != "":
-            logger.info("Using covariate")
-            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', addcovar=all_covars, model='{model}', method='{method}')")
-            result_data_frame = ro.r("qtl_results")
-        else:
-            ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', model='{model}', method='{method}')")
-            result_data_frame = np.asarray(ro.r("qtl_results")).T
+        geno_file = locate(dataset.group.name + ".geno", "genotype")
 
-        marker_names = np.asarray(ro.r("row.names(qtl_results)"))
+    post_data = {
+        "pheno_file": pheno_file,
+        "geno_file": geno_file,
+        "model": model,
+        "method": method,
+        "nperm": num_perm,
+        "scale": mapping_scale
+    }
 
-        # Do permutation (if requested by user)
-        if num_perm > 0 and permCheck == "ON":
-            # ZS: The strata list would only be populated if "Stratified" was checked on before mapping
-            if len(perm_strata_list) > 0:
-                cross_object, strata_ob = add_perm_strata(
-                    cross_object, perm_strata_list)
-
-                if do_control == "true" or cofactors != "":
-                    perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int(
-                        num_perm), perm_strata=strata_ob, model=model, method=method)
-                else:
-                    perm_data_frame = scanone(
-                        cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata=strata_ob, model=model, method=method)
-            else:
-                if do_control == "true" or cofactors != "":
-                    perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int(
-                        num_perm), model=model, method=method)
-                else:
-                    perm_data_frame = scanone(
-                        cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method)
-
-            # Functions that sets the thresholds for the webinterface
-            perm_output, suggestive, significant = process_rqtl_perm_results(
-                num_perm, perm_data_frame)
-            return perm_output, suggestive, significant, process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
-        else:
-            return process_rqtl_results(marker_names, result_data_frame, dataset.group.species)
-
-
-def generate_cross_from_rdata(dataset):
-    rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata")
-    ro.r("""
-       generate_cross_from_rdata <- function(filename = '%s') {
-           load(file=filename)
-           cross = cunique
-           return(cross)
-       }
-    """ % (rdata_location))
-
-
-# TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
-def generate_cross_from_geno(dataset, scale_units):
-
-    cross_filename = (f"{str(dataset.group.name)}_"
-                      f"{generate_random_n_string(6)}")
-
-    ro.r("""
-       trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
-       getGenoCode <- function(header, name = 'unk'){
-         mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
-         return(trim(strsplit(header[mat],':')[[1]][2]))
-       }
-       GENOtoCSVR <- function(genotypes = '%s', out = '%s.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
-         header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
-         toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
-         type <- getGenoCode(header, 'type')
-         if(type == '4-way'){
-            genocodes <- NULL
-         } else {
-            genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))             # Get the genotype codes
-         }
-         genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
-         cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
-         if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
-         if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
-         outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
-                          c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
-                          cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)]))                          # Genotypes
-         write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
-         require(qtl)
-         if(type == '4-way'){
-           cat('Loading in as 4-WAY\n')
-           cross = read.cross(file=out, 'csvr', genotypes=NULL, crosstype="4way")                                         # Load the created cross file using R/qtl read.cross
-         }else if(type == 'f2'){
-           cat('Loading in as F2\n')
-           cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="f2")                                       # Load the created cross file using R/qtl read.cross
-         }else{
-           cat('Loading in as normal\n')
-           cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                       # Load the created cross file using R/qtl read.cross
-         }
-         if(type == 'riset'){
-           cat('Converting to RISELF\n')
-           cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
-         }
-         return(cross)
-      }
-    """ % (dataset.group.genofile, cross_filename, scale_units))
+    if do_control == "true" and control_marker:
+        post_data["control_marker"] = control_marker
 
+    if not manhattan_plot:
+        post_data["interval"] = True
+    if cofactors:
+        post_data["addcovar"] = True
 
-def add_perm_strata(cross, perm_strata):
-    col_string = 'c("the_strata")'
-    perm_strata_string = "c("
-    for item in perm_strata:
-        perm_strata_string += str(item) + ","
+    out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file']
 
-    perm_strata_string = perm_strata_string[:-1] + ")"
+    return process_rqtl_results(out_file)
 
-    cross = add_phenotype(cross, perm_strata_string, "the_strata")
 
-    strata_ob = pull_var("perm_strata", cross, col_string)
+def process_rqtl_results(out_file: str) -> List:
+    """Given the output filename, read in results and
+    return as a list of dictionaries representing each
+    marker
 
-    return cross, strata_ob
+    """
 
-
-def sanitize_rqtl_phenotype(vals):
-    pheno_as_string = "c("
-    for i, val in enumerate(vals):
-        if val == "x":
-            if i < (len(vals) - 1):
-                pheno_as_string += "NA,"
+    marker_obs = []
+    # Later I should probably redo this using csv.read to avoid the
+    # awkwardness with removing quotes with [1:-1]
+    with open(GN3_TMP_PATH + "/output/" + out_file, "r") as the_file:
+        for line in the_file:
+            line_items = line.split(",")
+            if line_items[1][1:-1] == "chr" or not line_items:
+                continue
             else:
-                pheno_as_string += "NA"
+                # Convert chr to int if possible
+                try:
+                    the_chr = int(line_items[1][1:-1])
+                except:
+                    the_chr = line_items[1][1:-1]
+                this_marker = {
+                    "name": line_items[0][1:-1],
+                    "chr": the_chr,
+                    "cM": float(line_items[2]),
+                    "Mb": float(line_items[2]),
+                    "lod_score": float(line_items[3])
+                }
+                marker_obs.append(this_marker)
+
+    return marker_obs
+
+def get_hash_of_textio(the_file: TextIO) -> str:
+    """Given a StringIO, return the hash of its contents"""
+
+    the_file.seek(0)
+    hash_of_file = hashlib.md5(the_file.read().encode()).hexdigest()
+
+    return hash_of_file
+
+
+def write_phenotype_file(trait_name: str,
+                         samples: List[str],
+                         vals: List,
+                         dataset_ob,
+                         cofactors: Optional[str] = None) -> TextIO:
+    """Given trait name, sample list, value list, dataset ob, and optional string
+    representing cofactors, return the file's full path/name
+
+    """
+
+    cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples)
+
+    pheno_file = io.StringIO()
+    writer = csv.writer(pheno_file, delimiter="\t", quoting=csv.QUOTE_NONE)
+
+    header_row = ["Samples", trait_name]
+    header_row += [cofactor for cofactor in cofactor_data]
+
+    writer.writerow(header_row)
+    for i, sample in enumerate(samples):
+        this_row = [sample]
+        if vals[i] != "x":
+            this_row.append(vals[i])
         else:
-            if i < (len(vals) - 1):
-                pheno_as_string += str(val) + ","
-            else:
-                pheno_as_string += str(val)
-    pheno_as_string += ")"
-
-    return pheno_as_string
-
-
-def sanitize_rqtl_names(vals):
-    pheno_as_string = "c("
-    for i, val in enumerate(vals):
-        if val == "x":
-            if i < (len(vals) - 1):
-                pheno_as_string += "NA,"
-            else:
-                pheno_as_string += "NA"
-        else:
-            if i < (len(vals) - 1):
-                pheno_as_string += "'" + str(val) + "',"
-            else:
-                pheno_as_string += "'" + str(val) + "'"
-    pheno_as_string += ")"
-
-    return pheno_as_string
-
-
-def add_phenotype(cross, pheno_as_string, col_name):
-    ro.globalenv["the_cross"] = cross
-    ro.r('pheno <- data.frame(pull.pheno(the_cross))')
-    ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + \
-         ' = as.numeric(' + pheno_as_string + '))')
-    return ro.r["the_cross"]
-
-
-def add_categorical_covar(cross, covar_as_string, i):
-    ro.globalenv["the_cross"] = cross
-    logger.info("cross set")
-    ro.r('covar <- as.factor(' + covar_as_string + ')')
-    logger.info("covar set")
-    ro.r('newcovar <- model.matrix(~covar-1)')
-    logger.info("model.matrix finished")
-    ro.r('cat("new covar columns", ncol(newcovar), "\n")')
-    nCol = ro.r('ncol(newcovar)')
-    logger.info("ncol covar done: " + str(nCol[0]))
-    ro.r('pheno <- data.frame(pull.pheno(the_cross))')
-    logger.info("pheno pulled from cross")
-    nCol = int(nCol[0])
-    logger.info("nCol python int:" + str(nCol))
-    col_names = []
-    # logger.info("loop")
-    for x in range(1, (nCol + 1)):
-        #logger.info("loop" + str(x));
-        col_name = "covar_" + str(i) + "_" + str(x)
-        #logger.info("col_name" + col_name);
-        ro.r('the_cross$pheno <- cbind(pheno, ' + \
-             col_name + ' = newcovar[,' + str(x) + '])')
-        col_names.append(col_name)
-        #logger.info("loop" + str(x) + "done");
-
-    logger.info("returning from add_categorical_covar")
-    return ro.r["the_cross"], col_names
-
-
-def add_names(cross, names_as_string, col_name):
-    ro.globalenv["the_cross"] = cross
-    ro.r('pheno <- data.frame(pull.pheno(the_cross))')
-    ro.r('the_cross$pheno <- cbind(pheno, ' + \
-         col_name + ' = ' + names_as_string + ')')
-    return ro.r["the_cross"]
-
-
-def pull_var(var_name, cross, var_string):
-    ro.globalenv["the_cross"] = cross
-    ro.r(var_name + ' <- pull.pheno(the_cross, ' + var_string + ')')
-
-    return ro.r[var_name]
-
-
-def add_cofactors(cross, this_dataset, covariates, samples):
-    ro.numpy2ri.activate()
-
-    covariate_list = covariates.split(",")
-    covar_name_string = "c("
-    for i, covariate in enumerate(covariate_list):
-        logger.info("Covariate: " + covariate)
-        this_covar_data = []
-        covar_as_string = "c("
-        trait_name = covariate.split(":")[0]
-        dataset_ob = create_dataset(covariate.split(":")[1])
-        trait_ob = create_trait(dataset=dataset_ob,
-                                name=trait_name,
-                                cellid=None)
-
-        this_dataset.group.get_samplelist()
-        trait_samples = this_dataset.group.samplelist
-        trait_sample_data = trait_ob.data
-        for index, sample in enumerate(samples):
-            if sample in trait_samples:
-                if sample in trait_sample_data:
-                    sample_value = trait_sample_data[sample].value
-                    this_covar_data.append(sample_value)
-                else:
-                    this_covar_data.append("NA")
-
-        for j, item in enumerate(this_covar_data):
-            if j < (len(this_covar_data) - 1):
-                covar_as_string += str(item) + ","
-            else:
-                covar_as_string += str(item)
-
-        covar_as_string += ")"
-
-        datatype = get_trait_data_type(covariate)
-        logger.info("Covariate: " + covariate + " is of type: " + datatype)
-        if(datatype == "categorical"):  # Cat variable
-            logger.info("call of add_categorical_covar")
-            cross, col_names = add_categorical_covar(
-                cross, covar_as_string, i)  # Expand and add it to the cross
-            logger.info("add_categorical_covar returned")
-            # Go through the additional covar names
-            for z, col_name in enumerate(col_names):
-                if i < (len(covariate_list) - 1):
-                    covar_name_string += '"' + col_name + '", '
-                else:
-                    if(z < (len(col_names) - 1)):
-                        covar_name_string += '"' + col_name + '", '
+            this_row.append("NA")
+        for cofactor in cofactor_data:
+            this_row.append(cofactor_data[cofactor][i])
+        writer.writerow(this_row)
+
+    hash_of_file = get_hash_of_textio(pheno_file)
+    file_path = TMPDIR + hash_of_file + ".csv"
+
+    with open(file_path, "w") as fd:
+        pheno_file.seek(0)
+        shutil.copyfileobj(pheno_file, fd)
+
+    return file_path
+
+
+def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict:
+    """Given a string of cofactors, the trait being mapped's dataset ob,
+    and list of samples, return cofactor data as a Dict
+
+    """
+    cofactors = {}
+    if cofactors:
+        dataset_ob.group.get_samplelist()
+        sample_list = dataset_ob.group.samplelist
+        for cofactor in cofactors.split(","):
+            cofactor_name, cofactor_dataset = cofactor.split(":")
+            if cofactor_dataset == this_dataset.name:
+                cofactors[cofactor_name] = []
+                trait_ob = create_trait(dataset=dataset_ob,
+                                        name=cofactor_name)
+                sample_data = trait_ob.data
+                for index, sample in enumerate(samples):
+                    #if (sample in sample_list) and (sample in sample_data):
+                    if sample in sample_data:
+                        sample_value = sample_data[sample].value
+                        cofactors[cofactor_name].append(sample_value)
                     else:
-                        covar_name_string += '"' + col_name + '"'
-        else:
-            col_name = "covar_" + str(i)
-            cross = add_phenotype(cross, covar_as_string, col_name)
-            if i < (len(covariate_list) - 1):
-                covar_name_string += '"' + col_name + '", '
-            else:
-                covar_name_string += '"' + col_name + '"'
-
-    covar_name_string += ")"
-    covars_ob = pull_var("trait_covars", cross, covar_name_string)
-    return cross, covars_ob
-
-
-def create_marker_covariates(control_marker, cross):
-    ro.globalenv["the_cross"] = cross
-    # Get the genotype matrix
-    ro.r('genotypes <- pull.geno(the_cross)')
-    # TODO: sanitize user input, Never Ever trust a user
-    userinput_sanitized = control_marker.replace(" ", "").split(",")
-    logger.debug(userinput_sanitized)
-    if len(userinput_sanitized) > 0:
-        covariate_names = ', '.join('"{0}"'.format(w)
-                                    for w in userinput_sanitized)
-        ro.r('covnames <- c(' + covariate_names + ')')
-    else:
-        ro.r('covnames <- c()')
-    ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
-    ro.r('covnames <- covnames[covInGeno]')
-    ro.r("cat('covnames (purged): ', covnames,'\n')")
-    # Get the covariate matrix by using the marker name as index to the genotype file
-    ro.r('marker_covars <- genotypes[,covnames]')
-    # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc
-    return ro.r["marker_covars"]
-
-
-def process_pair_scan_results(result):
-    pair_scan_results = []
-
-    result = result[1]
-    output = [tuple([result[j][i] for j in range(result.ncol)])
-              for i in range(result.nrow)]
-
-    for i, line in enumerate(result.iter_row()):
-        marker = {}
-        marker['name'] = result.rownames[i]
-        marker['chr1'] = output[i][0]
-        marker['Mb'] = output[i][1]
-        marker['chr2'] = int(output[i][2])
-        pair_scan_results.append(marker)
-
-    return pair_scan_results
-
-
-def process_rqtl_perm_results(num_perm, results):
-    perm_vals = [item[0] for item in results]
-
-    perm_output = perm_vals
-    suggestive = np.percentile(np.array(perm_vals), 67)
-    significant = np.percentile(np.array(perm_vals), 95)
-
-    return perm_output, suggestive, significant
-
-
-def process_rqtl_results(marker_names, results, species_name):        # TODO: how to make this a one liner and not copy the stuff in a loop
-    qtl_results = []
-
-    for i, line in enumerate(results):
-        marker = {}
-        marker['name'] = marker_names[i]
-        if species_name == "mouse" and line[0] == 20:
-            marker['chr'] = "X"
-        else:
-            try:
-                marker['chr'] = int(line[0])
-            except:
-                marker['chr'] = line[0]
-        marker['cM'] = marker['Mb'] = line[1]
-        marker['lod_score'] = line[2]
-        qtl_results.append(marker)
-
-    return qtl_results
+                        cofactors[cofactor_name].append("NA")
+    return cofactors
\ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index a3b579ec..be1186c0 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -242,10 +242,10 @@ class RunMapping:
             # if start_vars['pair_scan'] == "true":
             #    self.pair_scan = True
             if self.permCheck and self.num_perm > 0:
-                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
-                    self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl(
+                    self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
             else:
-                results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck,
+                results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck,
                                                      self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
         elif self.mapping_method == "reaper":
             if "startMb" in start_vars:  # ZS: Check if first time page loaded, so it can default to ON
-- 
cgit v1.2.3


From efbe47b58f6e0b9a4b509a28ec8788a1c6dae343 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 21 May 2021 19:43:23 +0000
Subject: run_rqtl now reads in permutation output

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 34 +++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 3f4899b0..c3f047fc 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -8,6 +8,8 @@ from typing import List
 from typing import Optional
 from typing import TextIO
 
+import numpy as np
+
 from base.webqtlConfig import TMPDIR
 from base.trait import create_trait
 from utility.tools import locate
@@ -46,7 +48,11 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, p
 
     out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file']
 
-    return process_rqtl_results(out_file)
+    if num_perm > 0:
+        perm_results, suggestive, significant = process_perm_results(out_file)
+        return perm_results, suggestive, significant, process_rqtl_results(out_file)
+    else:
+        return process_rqtl_results(out_file)
 
 
 def process_rqtl_results(out_file: str) -> List:
@@ -63,6 +69,7 @@ def process_rqtl_results(out_file: str) -> List:
         for line in the_file:
             line_items = line.split(",")
             if line_items[1][1:-1] == "chr" or not line_items:
+                # Skip header line
                 continue
             else:
                 # Convert chr to int if possible
@@ -81,6 +88,30 @@ def process_rqtl_results(out_file: str) -> List:
 
     return marker_obs
 
+
+def process_perm_results(out_file: str):
+    """Given base filename, read in R/qtl permutation output and calculate
+    suggestive and significant thresholds
+
+    """
+    perm_results = []
+    with open(GN3_TMP_PATH + "/output/PERM_" + out_file, "r") as the_file:
+        for i, line in enumerate(the_file):
+            if i == 0:
+                # Skip header line
+                continue
+            else:
+                line_items = line.split(",")
+                perm_results.append(float(line_items[1]))
+
+    logger.debug("PERM RESULTS:", perm_results)
+
+    suggestive = np.percentile(np.array(perm_results), 67)
+    significant = np.percentile(np.array(perm_results), 95)
+
+    return perm_results, suggestive, significant
+
+
 def get_hash_of_textio(the_file: TextIO) -> str:
     """Given a StringIO, return the hash of its contents"""
 
@@ -146,7 +177,6 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict:
                                         name=cofactor_name)
                 sample_data = trait_ob.data
                 for index, sample in enumerate(samples):
-                    #if (sample in sample_list) and (sample in sample_data):
                     if sample in sample_data:
                         sample_value = sample_data[sample].value
                         cofactors[cofactor_name].append(sample_value)
-- 
cgit v1.2.3


From d2edff1a05a81cd2ca703de940ea2bec5bd1dd10 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 21 May 2021 20:32:15 +0000
Subject: Removed a couple unused parameters for run_rqtl

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 2 +-
 wqflask/wqflask/marker_regression/run_mapping.py  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index c3f047fc..d18f6a7b 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -20,7 +20,7 @@ logger = utility.logger.getLogger(__name__)
 GN3_RQTL_URL = "http://localhost:8086/api/rqtl/compute"
 GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp"
 
-def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors):
+def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors):
     """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)"""
 
     pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors)
diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index be1186c0..d727e3ff 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -243,10 +243,10 @@ class RunMapping:
             #    self.pair_scan = True
             if self.permCheck and self.num_perm > 0:
                 self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl(
-                    self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+                    self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates)
             else:
-                results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck,
-                                                     self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates)
+                results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method,
+                                                     self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates)
         elif self.mapping_method == "reaper":
             if "startMb" in start_vars:  # ZS: Check if first time page loaded, so it can default to ON
                 if "additiveCheck" in start_vars:
-- 
cgit v1.2.3


From 579ec56e488db19e7cd489b906ae6da7cd453239 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Sat, 22 May 2021 22:31:51 +0000
Subject: Rewrote test_rqtl_mapping.py, though haven't done tests for all
 functions yet

---
 .../wqflask/marker_regression/test_rqtl_mapping.py | 76 ++++++++++++----------
 1 file changed, 40 insertions(+), 36 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 91d2c587..5c679c05 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -1,42 +1,46 @@
 import unittest
 from unittest import mock
-from wqflask import app
-from wqflask.marker_regression.rqtl_mapping import get_trait_data_type
-from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_phenotype
-from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_names
+from wqflask.marker_regression.rqtl_mapping import run_rqtl
 
+class AttributeSetter:
+    def __init__(self, obj):
+        for key, val in obj.items():
+            setattr(self, key, val)
+
+class MockGroup(AttributeSetter):
+    def get_samplelist(self):
+        return None
 
 class TestRqtlMapping(unittest.TestCase):
 
-    def setUp(self):
-        self.app_context = app.app_context()
-        self.app_context.push()
-
-    def tearDown(self):
-        self.app_context.pop()
-
-    @mock.patch("wqflask.marker_regression.rqtl_mapping.g")
-    @mock.patch("wqflask.marker_regression.rqtl_mapping.logger")
-    def test_get_trait_data(self, mock_logger, mock_db):
-        """test for getting trait data_type return True"""
-        query_value = """SELECT value FROM TraitMetadata WHERE type='trait_data_type'"""
-        mock_db.db.execute.return_value.fetchone.return_value = [
-            """{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""]
-        results = get_trait_data_type("traid_id")
-        mock_db.db.execute.assert_called_with(query_value)
-        self.assertEqual(results, "fer434f")
-
-    def test_sanitize_rqtl_phenotype(self):
-        """test for sanitizing rqtl phenotype"""
-        vals = ['f', "x", "r", "x", "x"]
-        results = sanitize_rqtl_phenotype(vals)
-        expected_phenotype_string = 'c(f,NA,r,NA,NA)'
-
-        self.assertEqual(results, expected_phenotype_string)
-
-    def test_sanitize_rqtl_names(self):
-        """test for sanitzing rqtl names"""
-        vals = ['f', "x", "r", "x", "x"]
-        expected_sanitized_name = "c('f',NA,'r',NA,NA)"
-        results = sanitize_rqtl_names(vals)
-        self.assertEqual(expected_sanitized_name, results)
+    @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results")
+    @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results")
+    @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post")
+    @mock.patch("wqflask.marker_regression.rqtl_mapping.locate")
+    @mock.patch("wqflask.marker_regression.gemma_mapping.write_phenotype_file")
+    def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl):
+        """Test for run_rqtl with permutations > 0"""
+        dataset_group = MockGroup(
+            {"name": "GP1", "genofile": "file_geno"})
+
+        dataset = AttributeSetter({"group": dataset_group})
+
+        mock_write_pheno_file.return_value = "pheno_filename"
+        mock_locate.return_value = "geno_filename"
+
+        mock_post.return_value = "output_filename"
+
+        mock_process_perm.return_value = [[], 3, 4]
+        mock_process_rqtl.return_value = []
+
+        results = run_rqtl(trait_name="the_trait", vals=[], samples=[],
+        dataset=dataset, mapping_scale="cM", model="normal", method="hk",
+        num_perm=5, perm_strata_list=[], do_control="false", control_marker="",
+        manhattan_plot=True, cofactors="")
+
+        mock_write_pheno_file.assert_called_once()
+        mock_locate.assert_called_once()
+        mock_post.assert_called_once()
+        mock_process_perm.assert_called_once()
+        mock_process_rqtl.assert_called_once()
+        self.assertEqual(results, ([], 3, 4, []))
\ No newline at end of file
-- 
cgit v1.2.3


From 1ec1219eae75f1cf9a1b20ea479ea6e2934eaa68 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Sat, 22 May 2021 22:43:58 +0000
Subject: Fixed a couple broken tests

---
 wqflask/tests/unit/wqflask/api/test_mapping.py                    | 2 +-
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/api/test_mapping.py b/wqflask/tests/unit/wqflask/api/test_mapping.py
index b094294a..159c982b 100644
--- a/wqflask/tests/unit/wqflask/api/test_mapping.py
+++ b/wqflask/tests/unit/wqflask/api/test_mapping.py
@@ -58,7 +58,7 @@ class TestMapping(unittest.TestCase):
 
         self.assertEqual(results_2, expected_results)
 
-    @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl_geno")
+    @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl")
     @mock.patch("wqflask.api.mapping.gemma_mapping.run_gemma")
     @mock.patch("wqflask.api.mapping.initialize_parameters")
     @mock.patch("wqflask.api.mapping.retrieve_sample_data")
diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 5c679c05..626869b8 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -17,7 +17,7 @@ class TestRqtlMapping(unittest.TestCase):
     @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.locate")
-    @mock.patch("wqflask.marker_regression.gemma_mapping.write_phenotype_file")
+    @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file")
     def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl):
         """Test for run_rqtl with permutations > 0"""
         dataset_group = MockGroup(
-- 
cgit v1.2.3


From cf9f0bef4454f97270bcacd16080f9d7f6bbb5e6 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Sat, 22 May 2021 22:51:19 +0000
Subject: Fixed the mocked return_value for requests.post to fix broken test

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 626869b8..6853c021 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -28,7 +28,8 @@ class TestRqtlMapping(unittest.TestCase):
         mock_write_pheno_file.return_value = "pheno_filename"
         mock_locate.return_value = "geno_filename"
 
-        mock_post.return_value = "output_filename"
+        mock_post.return_value = {"output_file": "output_filename",
+                                  "rqtl_cmd": "the_command"}
 
         mock_process_perm.return_value = [[], 3, 4]
         mock_process_rqtl.return_value = []
-- 
cgit v1.2.3


From 4d15c32e7d86885cffce1720db0df394bd6a02ff Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 25 May 2021 20:19:37 +0000
Subject: Removed code that processes output files, since this functionality is
 moved to GN3

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 67 ++---------------------
 1 file changed, 4 insertions(+), 63 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index d18f6a7b..495678e9 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -46,70 +46,11 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, n
     if cofactors:
         post_data["addcovar"] = True
 
-    out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file']
-
+    rqtl_output = requests.post(GN3_RQTL_URL, data=post_data).json()
     if num_perm > 0:
-        perm_results, suggestive, significant = process_perm_results(out_file)
-        return perm_results, suggestive, significant, process_rqtl_results(out_file)
+        return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results']
     else:
-        return process_rqtl_results(out_file)
-
-
-def process_rqtl_results(out_file: str) -> List:
-    """Given the output filename, read in results and
-    return as a list of dictionaries representing each
-    marker
-
-    """
-
-    marker_obs = []
-    # Later I should probably redo this using csv.read to avoid the
-    # awkwardness with removing quotes with [1:-1]
-    with open(GN3_TMP_PATH + "/output/" + out_file, "r") as the_file:
-        for line in the_file:
-            line_items = line.split(",")
-            if line_items[1][1:-1] == "chr" or not line_items:
-                # Skip header line
-                continue
-            else:
-                # Convert chr to int if possible
-                try:
-                    the_chr = int(line_items[1][1:-1])
-                except:
-                    the_chr = line_items[1][1:-1]
-                this_marker = {
-                    "name": line_items[0][1:-1],
-                    "chr": the_chr,
-                    "cM": float(line_items[2]),
-                    "Mb": float(line_items[2]),
-                    "lod_score": float(line_items[3])
-                }
-                marker_obs.append(this_marker)
-
-    return marker_obs
-
-
-def process_perm_results(out_file: str):
-    """Given base filename, read in R/qtl permutation output and calculate
-    suggestive and significant thresholds
-
-    """
-    perm_results = []
-    with open(GN3_TMP_PATH + "/output/PERM_" + out_file, "r") as the_file:
-        for i, line in enumerate(the_file):
-            if i == 0:
-                # Skip header line
-                continue
-            else:
-                line_items = line.split(",")
-                perm_results.append(float(line_items[1]))
-
-    logger.debug("PERM RESULTS:", perm_results)
-
-    suggestive = np.percentile(np.array(perm_results), 67)
-    significant = np.percentile(np.array(perm_results), 95)
-
-    return perm_results, suggestive, significant
+        return rqtl_output['results']
 
 
 def get_hash_of_textio(the_file: TextIO) -> str:
@@ -182,4 +123,4 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict:
                         cofactors[cofactor_name].append(sample_value)
                     else:
                         cofactors[cofactor_name].append("NA")
-    return cofactors
\ No newline at end of file
+    return cofactors
-- 
cgit v1.2.3


From 666494b49bef48a46145cf415262d532530aba46 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 25 May 2021 20:49:48 +0000
Subject: Fixed/shortened get_categorical_variables function and correctly pass
 them to show_trait template

---
 wqflask/wqflask/show_trait/show_trait.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 837c7a54..a02da872 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -279,6 +279,8 @@ class ShowTrait:
         hddn['suggestive'] = 0
         hddn['num_perm'] = 0
         hddn['categorical_vars'] = ""
+        if categorical_var_list:
+            hddn['categorical_vars'] = ",".join(categorical_var_list)
         hddn['manhattan_plot'] = ""
         hddn['control_marker'] = ""
         if not self.temp_trait:
@@ -684,23 +686,13 @@ def get_ncbi_summary(this_trait):
         return None
 
 
-def get_categorical_variables(this_trait, sample_list):
+def get_categorical_variables(this_trait, sample_list) -> list:
     categorical_var_list = []
 
     if len(sample_list.attributes) > 0:
         for attribute in sample_list.attributes:
-            attribute_vals = []
-            for sample_name in list(this_trait.data.keys()):
-                if sample_list.attributes[attribute].name in this_trait.data[sample_name].extra_attributes:
-                    attribute_vals.append(
-                        this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name])
-                else:
-                    attribute_vals.append("N/A")
-            num_distinct = len(set(attribute_vals))
-
-            if num_distinct < 10:
-                categorical_var_list.append(
-                    sample_list.attributes[attribute].name)
+            if len(sample_list.attributes[attribute].distinct_values) < 10:
+                categorical_var_list.append(sample_list.attributes[attribute].name)
 
     return categorical_var_list
 
-- 
cgit v1.2.3


From 92f84ab7c544ac9d7ffdbf302cf35b8b67ef7819 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 19:59:24 +0000
Subject: Add code writing perm_strata to phenotype input file in
 rqtl_mapping.py + fix issue in cofactors_to_dict where two variables were
 both called 'cofactors'

---
 wqflask/wqflask/marker_regression/rqtl_mapping.py | 25 +++++++++++++++--------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py
index 495678e9..09afb8d1 100644
--- a/wqflask/wqflask/marker_regression/rqtl_mapping.py
+++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py
@@ -23,7 +23,7 @@ GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp"
 def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors):
     """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)"""
 
-    pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors)
+    pheno_file = write_phenotype_file(trait_name, samples, vals, dataset, cofactors, perm_strata_list)
     if dataset.group.genofile:
         geno_file = locate(dataset.group.genofile, "genotype")
     else:
@@ -46,6 +46,9 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, n
     if cofactors:
         post_data["addcovar"] = True
 
+    if perm_strata_list:
+        post_data["pstrata"] = True
+
     rqtl_output = requests.post(GN3_RQTL_URL, data=post_data).json()
     if num_perm > 0:
         return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results']
@@ -66,12 +69,12 @@ def write_phenotype_file(trait_name: str,
                          samples: List[str],
                          vals: List,
                          dataset_ob,
-                         cofactors: Optional[str] = None) -> TextIO:
+                         cofactors: Optional[str] = None,
+                         perm_strata_list: Optional[List] = None) -> TextIO:
     """Given trait name, sample list, value list, dataset ob, and optional string
     representing cofactors, return the file's full path/name
 
     """
-
     cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples)
 
     pheno_file = io.StringIO()
@@ -79,6 +82,8 @@ def write_phenotype_file(trait_name: str,
 
     header_row = ["Samples", trait_name]
     header_row += [cofactor for cofactor in cofactor_data]
+    if perm_strata_list:
+        header_row.append("Strata")
 
     writer.writerow(header_row)
     for i, sample in enumerate(samples):
@@ -89,6 +94,8 @@ def write_phenotype_file(trait_name: str,
             this_row.append("NA")
         for cofactor in cofactor_data:
             this_row.append(cofactor_data[cofactor][i])
+        if perm_strata_list:
+            this_row.append(perm_strata_list[i])
         writer.writerow(this_row)
 
     hash_of_file = get_hash_of_textio(pheno_file)
@@ -106,21 +113,21 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict:
     and list of samples, return cofactor data as a Dict
 
     """
-    cofactors = {}
+    cofactor_dict = {}
     if cofactors:
         dataset_ob.group.get_samplelist()
         sample_list = dataset_ob.group.samplelist
         for cofactor in cofactors.split(","):
             cofactor_name, cofactor_dataset = cofactor.split(":")
-            if cofactor_dataset == this_dataset.name:
-                cofactors[cofactor_name] = []
+            if cofactor_dataset == dataset_ob.name:
+                cofactor_dict[cofactor_name] = []
                 trait_ob = create_trait(dataset=dataset_ob,
                                         name=cofactor_name)
                 sample_data = trait_ob.data
                 for index, sample in enumerate(samples):
                     if sample in sample_data:
                         sample_value = sample_data[sample].value
-                        cofactors[cofactor_name].append(sample_value)
+                        cofactor_dict[cofactor_name].append(sample_value)
                     else:
-                        cofactors[cofactor_name].append("NA")
-    return cofactors
+                        cofactor_dict[cofactor_name].append("NA")
+    return cofactor_dict
-- 
cgit v1.2.3


From 6a54fa84542458fe8cfd288bf4c507214a1ddd04 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:08:14 +0000
Subject: Make all attribute names lowercase to deal with situations where
 sometimes they're lower-case and other times not

---
 wqflask/wqflask/marker_regression/run_mapping.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py
index d727e3ff..c5b980a7 100644
--- a/wqflask/wqflask/marker_regression/run_mapping.py
+++ b/wqflask/wqflask/marker_regression/run_mapping.py
@@ -765,9 +765,9 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples):
         if sample in list(sample_list.sample_attribute_values.keys()):
             combined_string = ""
             for var in categorical_vars:
-                if var in list(sample_list.sample_attribute_values[sample].keys()):
+                if var.lower() in sample_list.sample_attribute_values[sample]:
                     combined_string += str(
-                        sample_list.sample_attribute_values[sample][var])
+                        sample_list.sample_attribute_values[sample][var.lower()])
                 else:
                     combined_string += "NA"
         else:
-- 
cgit v1.2.3


From 2c2e1dc3d4be635dbf73aab380e22acca6a2014b Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:18:36 +0000
Subject: Improved test_rqtl_mapping.py with Bonface's recommendation of uses
 dataclasses

---
 .../wqflask/marker_regression/test_rqtl_mapping.py | 23 ++++++++++------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 6853c021..bd97b2d2 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -2,17 +2,17 @@ import unittest
 from unittest import mock
 from wqflask.marker_regression.rqtl_mapping import run_rqtl
 
-class AttributeSetter:
-    def __init__(self, obj):
-        for key, val in obj.items():
-            setattr(self, key, val)
+@dataclass
+class MockGroup:
+    name: str,
+    genofile: str
 
-class MockGroup(AttributeSetter):
-    def get_samplelist(self):
-        return None
+@dataclass
+class MockDataset:
+    group: MockGroup
 
 class TestRqtlMapping(unittest.TestCase):
-
+    """Tests for functions in rqtl_mapping.py"""
     @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post")
@@ -22,15 +22,12 @@ class TestRqtlMapping(unittest.TestCase):
         """Test for run_rqtl with permutations > 0"""
         dataset_group = MockGroup(
             {"name": "GP1", "genofile": "file_geno"})
-
-        dataset = AttributeSetter({"group": dataset_group})
+        dataset = MockDataset(dataset_group)
 
         mock_write_pheno_file.return_value = "pheno_filename"
         mock_locate.return_value = "geno_filename"
-
         mock_post.return_value = {"output_file": "output_filename",
                                   "rqtl_cmd": "the_command"}
-
         mock_process_perm.return_value = [[], 3, 4]
         mock_process_rqtl.return_value = []
 
@@ -44,4 +41,4 @@ class TestRqtlMapping(unittest.TestCase):
         mock_post.assert_called_once()
         mock_process_perm.assert_called_once()
         mock_process_rqtl.assert_called_once()
-        self.assertEqual(results, ([], 3, 4, []))
\ No newline at end of file
+        self.assertEqual(results, ([], 3, 4, []))
-- 
cgit v1.2.3


From 37941d2724aaf22c1aeca18a77e4c17248c5b7bc Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:27:26 +0000
Subject: Updated test_run_mapping.py to account for attribute keys being
 checked as lowercase, though not sure if this will fully fix the test

---
 .../unit/wqflask/marker_regression/test_run_mapping.py     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py
index 78cd3be9..c220a072 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py
@@ -229,20 +229,20 @@ class TestRunMapping(unittest.TestCase):
         used_samples = ["S1", "S2"]
         sample_list = AttributeSetter({"sample_attribute_values": {
             "S1": {
-                "C1": "c1_value",
-                "C2": "c2_value",
-                "W1": "w1_value"
+                "c1": "c1_value",
+                "c2": "c2_value",
+                "w1": "w1_value"
 
             },
             "S2": {
-                "W1": "w2_value",
-                "W2": "w2_value"
+                "w1": "w2_value",
+                "w2": "w2_value"
 
             },
             "S3": {
 
-                "C1": "c1_value",
-                "C2": "c2_value"
+                "c1": "c1_value",
+                "c2": "c2_value"
 
             },
 
-- 
cgit v1.2.3


From 09768ed9c255cdd10561eeba28bf3752747bf378 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:29:57 +0000
Subject: Removed parts of test_rqtl_mapping.py referring to
 process_perm_results and process_rqtl_results since that functionality was
 moved to GN3

---
 .../tests/unit/wqflask/marker_regression/test_rqtl_mapping.py    | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index bd97b2d2..00a05d2d 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -13,12 +13,10 @@ class MockDataset:
 
 class TestRqtlMapping(unittest.TestCase):
     """Tests for functions in rqtl_mapping.py"""
-    @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results")
-    @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.locate")
     @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file")
-    def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl):
+    def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post):
         """Test for run_rqtl with permutations > 0"""
         dataset_group = MockGroup(
             {"name": "GP1", "genofile": "file_geno"})
@@ -28,8 +26,6 @@ class TestRqtlMapping(unittest.TestCase):
         mock_locate.return_value = "geno_filename"
         mock_post.return_value = {"output_file": "output_filename",
                                   "rqtl_cmd": "the_command"}
-        mock_process_perm.return_value = [[], 3, 4]
-        mock_process_rqtl.return_value = []
 
         results = run_rqtl(trait_name="the_trait", vals=[], samples=[],
         dataset=dataset, mapping_scale="cM", model="normal", method="hk",
@@ -39,6 +35,5 @@ class TestRqtlMapping(unittest.TestCase):
         mock_write_pheno_file.assert_called_once()
         mock_locate.assert_called_once()
         mock_post.assert_called_once()
-        mock_process_perm.assert_called_once()
-        mock_process_rqtl.assert_called_once()
+
         self.assertEqual(results, ([], 3, 4, []))
-- 
cgit v1.2.3


From 0cc4376462b7f136e59d99dba1975d6023326126 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:32:58 +0000
Subject: Fixed syntax mistake when creating dataclass in test_rqtl_mapping.py

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 00a05d2d..31a2c9e4 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -4,7 +4,7 @@ from wqflask.marker_regression.rqtl_mapping import run_rqtl
 
 @dataclass
 class MockGroup:
-    name: str,
+    name: str
     genofile: str
 
 @dataclass
-- 
cgit v1.2.3


From 97df46292e8322f01466d36b1e1e3ecdad64e5c0 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:50:03 +0000
Subject: Forgot to import dataclass

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 31a2c9e4..ff451230 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -1,5 +1,7 @@
 import unittest
 from unittest import mock
+from dataclasses import dataclass
+
 from wqflask.marker_regression.rqtl_mapping import run_rqtl
 
 @dataclass
-- 
cgit v1.2.3


From 767ff7e97a751f164da610b2f9c536b6660ec420 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 20:53:22 +0000
Subject: Fixed way MockGroup was initialized

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index ff451230..a7f708e1 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -20,8 +20,7 @@ class TestRqtlMapping(unittest.TestCase):
     @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file")
     def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post):
         """Test for run_rqtl with permutations > 0"""
-        dataset_group = MockGroup(
-            {"name": "GP1", "genofile": "file_geno"})
+        dataset_group = MockGroup("GP1", "file_geno")
         dataset = MockDataset(dataset_group)
 
         mock_write_pheno_file.return_value = "pheno_filename"
-- 
cgit v1.2.3


From f69eb61c5f58b008233f7bee0e551fe9151c7724 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 21:09:18 +0000
Subject: Change test_rqtl_mapping.py to account for full results being
 returned from the GN3 request

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index a7f708e1..9a5162ae 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -25,8 +25,11 @@ class TestRqtlMapping(unittest.TestCase):
 
         mock_write_pheno_file.return_value = "pheno_filename"
         mock_locate.return_value = "geno_filename"
-        mock_post.return_value = {"output_file": "output_filename",
-                                  "rqtl_cmd": "the_command"}
+        mock_post.return_value = Mock(ok=True)
+        mock_post.return_value.json.return_value = {"perm_results": [],
+                                                    "suggestive": 3,
+                                                    "significant": 4,
+                                                    "results" : []}
 
         results = run_rqtl(trait_name="the_trait", vals=[], samples=[],
         dataset=dataset, mapping_scale="cM", model="normal", method="hk",
-- 
cgit v1.2.3


From e78c84aa10849465e0272daa6d94bfbd3419b072 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 27 May 2021 21:11:16 +0000
Subject: Fix the way Mock is initialized

---
 wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
index 9a5162ae..9d13e943 100644
--- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
+++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py
@@ -25,7 +25,7 @@ class TestRqtlMapping(unittest.TestCase):
 
         mock_write_pheno_file.return_value = "pheno_filename"
         mock_locate.return_value = "geno_filename"
-        mock_post.return_value = Mock(ok=True)
+        mock_post.return_value = mock.Mock(ok=True)
         mock_post.return_value.json.return_value = {"perm_results": [],
                                                     "suggestive": 3,
                                                     "significant": 4,
-- 
cgit v1.2.3