From c2fac869f5ac1a398677d5646d34a1a0704e29e4 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 20 May 2021 21:44:12 +0000 Subject: Re-implemented R/qtl to get the results from the GN3 API --- wqflask/wqflask/marker_regression/rqtl_mapping.py | 569 +++++----------------- wqflask/wqflask/marker_regression/run_mapping.py | 6 +- 2 files changed, 135 insertions(+), 440 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 1c8477bf..3f4899b0 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -1,460 +1,155 @@ -import rpy2.robjects as ro -import rpy2.robjects.numpy2ri as np2r -import numpy as np -import json - -from flask import g +import csv +import hashlib +import io +import requests +import shutil +from typing import Dict +from typing import List +from typing import Optional +from typing import TextIO from base.webqtlConfig import TMPDIR from base.trait import create_trait -from base.data_set import create_dataset -from utility import webqtlUtil -from utility.tools import locate, TEMPDIR -from wqflask.marker_regression.gemma_mapping import generate_random_n_string -from flask import g +from utility.tools import locate import utility.logger logger = utility.logger.getLogger(__name__) -# Get a trait's type (numeric, categorical, etc) from the DB - - -def get_trait_data_type(trait_db_string): - logger.info("get_trait_data_type") - the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - logger.info("the_query done") - results_json = g.db.execute(the_query).fetchone() - logger.info("the_query executed") - results_ob = json.loads(results_json[0]) - logger.info("json results loaded") - if trait_db_string in results_ob: - logger.info("found") - return results_ob[trait_db_string] - else: - logger.info("not found") - return "numeric" - +GN3_RQTL_URL = "http://localhost:8086/api/rqtl/compute" +GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp" -# Run qtl mapping using R/qtl -def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): - logger.info("Start run_rqtl_geno") - # Get pointers to some common R functions - r_library = ro.r["library"] # Map the library function - r_c = ro.r["c"] # Map the c function - plot = ro.r["plot"] # Map the plot function - png = ro.r["png"] # Map the png function - dev_off = ro.r["dev.off"] # Map the device off function +def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): + """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" - print((r_library("qtl"))) # Load R/qtl - - logger.info("QTL library loaded") - - # Get pointers to some R/qtl functions - scanone = ro.r["scanone"] # Map the scanone function - scantwo = ro.r["scantwo"] # Map the scantwo function - # Map the calc.genoprob function - calc_genoprob = ro.r["calc.genoprob"] - - crossname = dataset.group.name - # try: - # generate_cross_from_rdata(dataset) - # read_cross_from_rdata = ro.r["generate_cross_from_rdata"] # Map the local read_cross_from_rdata function - # genofilelocation = locate(crossname + ".RData", "genotype/rdata") - # cross_object = read_cross_from_rdata(genofilelocation) # Map the local GENOtoCSVR function - # except: - - if mapping_scale == "morgan": - scale_units = "cM" - else: - scale_units = "Mb" - - generate_cross_from_geno(dataset, scale_units) - # Map the local GENOtoCSVR function - GENOtoCSVR = ro.r["GENOtoCSVR"] - crossfilelocation = TMPDIR + crossname + ".cross" + pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors) if dataset.group.genofile: - genofilelocation = locate(dataset.group.genofile, "genotype") - else: - genofilelocation = locate(dataset.group.name + ".geno", "genotype") - logger.info("Going to create a cross from geno") - # TODO: Add the SEX if that is available - cross_object = GENOtoCSVR(genofilelocation, crossfilelocation) - logger.info("before calc_genoprob") - if manhattan_plot: - cross_object = calc_genoprob(cross_object) + geno_file = locate(dataset.group.genofile, "genotype") else: - cross_object = calc_genoprob(cross_object, step=5, stepwidth="max") - logger.info("after calc_genoprob") - - pheno_string = sanitize_rqtl_phenotype(vals) - logger.info("phenostring done") - names_string = sanitize_rqtl_names(samples) - logger.info("sanitized pheno and names") - # Add the phenotype - cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") - # Add the phenotype - cross_object = add_names(cross_object, names_string, "the_names") - logger.info("Added pheno and names") - # Create the additive covariate markers - marker_covars = create_marker_covariates(control_marker, cross_object) - logger.info("Marker covars done") - if cofactors != "": - logger.info("Cofactors: " + cofactors) - # Create the covariates from selected traits - cross_object, trait_covars = add_cofactors( - cross_object, dataset, cofactors, samples) - ro.r('all_covars <- cbind(marker_covars, trait_covars)') - else: - ro.r('all_covars <- marker_covars') - covars = ro.r['all_covars'] - # DEBUG to save the session object to file - if pair_scan: - if do_control == "true": - logger.info("Using covariate") - result_data_frame = scantwo( - cross_object, pheno="the_pheno", addcovar=covars, model=model, method=method, n_cluster=16) - else: - logger.info("No covariates") - result_data_frame = scantwo( - cross_object, pheno="the_pheno", model=model, method=method, n_cluster=16) - - pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=TEMPDIR + pair_scan_filename) - plot(result_data_frame) - dev_off() - - return process_pair_scan_results(result_data_frame) - else: - if do_control == "true" or cofactors != "": - logger.info("Using covariate") - ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', addcovar=all_covars, model='{model}', method='{method}')") - result_data_frame = ro.r("qtl_results") - else: - ro.r(f"qtl_results = scanone(the_cross, pheno='the_pheno', model='{model}', method='{method}')") - result_data_frame = np.asarray(ro.r("qtl_results")).T + geno_file = locate(dataset.group.name + ".geno", "genotype") - marker_names = np.asarray(ro.r("row.names(qtl_results)")) + post_data = { + "pheno_file": pheno_file, + "geno_file": geno_file, + "model": model, + "method": method, + "nperm": num_perm, + "scale": mapping_scale + } - # Do permutation (if requested by user) - if num_perm > 0 and permCheck == "ON": - # ZS: The strata list would only be populated if "Stratified" was checked on before mapping - if len(perm_strata_list) > 0: - cross_object, strata_ob = add_perm_strata( - cross_object, perm_strata_list) - - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( - num_perm), perm_strata=strata_ob, model=model, method=method) - else: - perm_data_frame = scanone( - cross_object, pheno_col="the_pheno", n_perm=num_perm, perm_strata=strata_ob, model=model, method=method) - else: - if do_control == "true" or cofactors != "": - perm_data_frame = scanone(cross_object, pheno_col="the_pheno", addcovar=covars, n_perm=int( - num_perm), model=model, method=method) - else: - perm_data_frame = scanone( - cross_object, pheno_col="the_pheno", n_perm=num_perm, model=model, method=method) - - # Functions that sets the thresholds for the webinterface - perm_output, suggestive, significant = process_rqtl_perm_results( - num_perm, perm_data_frame) - return perm_output, suggestive, significant, process_rqtl_results(marker_names, result_data_frame, dataset.group.species) - else: - return process_rqtl_results(marker_names, result_data_frame, dataset.group.species) - - -def generate_cross_from_rdata(dataset): - rdata_location = locate(dataset.group.name + ".RData", "genotype/rdata") - ro.r(""" - generate_cross_from_rdata <- function(filename = '%s') { - load(file=filename) - cross = cunique - return(cross) - } - """ % (rdata_location)) - - -# TODO: Need to figure out why some genofiles have the wrong format and don't convert properly -def generate_cross_from_geno(dataset, scale_units): - - cross_filename = (f"{str(dataset.group.name)}_" - f"{generate_random_n_string(6)}") - - ro.r(""" - trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) } - getGenoCode <- function(header, name = 'unk'){ - mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1) - return(trim(strsplit(header[mat],':')[[1]][2])) - } - GENOtoCSVR <- function(genotypes = '%s', out = '%s.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){ - header = readLines(genotypes, 40) # Assume a geno header is not longer than 40 lines - toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1 # Major hack to skip the geno headers - type <- getGenoCode(header, 'type') - if(type == '4-way'){ - genocodes <- NULL - } else { - genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat')) # Get the genotype codes - } - genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#') - cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n') - if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4)) # If there isn't a phenotype, generate a random one - if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4)) # If there isn't a sex phenotype, treat all as males - outCSVR <- rbind(c('Pheno', '', '', phenotype), # Phenotype - c('sex', '', '', sex), # Sex phenotype for the mice - cbind(genodata[,c('Locus','Chr', '%s')], genodata[, 5:ncol(genodata)])) # Genotypes - write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',') # Save it to a file - require(qtl) - if(type == '4-way'){ - cat('Loading in as 4-WAY\n') - cross = read.cross(file=out, 'csvr', genotypes=NULL, crosstype="4way") # Load the created cross file using R/qtl read.cross - }else if(type == 'f2'){ - cat('Loading in as F2\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes, crosstype="f2") # Load the created cross file using R/qtl read.cross - }else{ - cat('Loading in as normal\n') - cross = read.cross(file=out, 'csvr', genotypes=genocodes) # Load the created cross file using R/qtl read.cross - } - if(type == 'riset'){ - cat('Converting to RISELF\n') - cross <- convert2riself(cross) # If its a RIL, convert to a RIL in R/qtl - } - return(cross) - } - """ % (dataset.group.genofile, cross_filename, scale_units)) + if do_control == "true" and control_marker: + post_data["control_marker"] = control_marker + if not manhattan_plot: + post_data["interval"] = True + if cofactors: + post_data["addcovar"] = True -def add_perm_strata(cross, perm_strata): - col_string = 'c("the_strata")' - perm_strata_string = "c(" - for item in perm_strata: - perm_strata_string += str(item) + "," + out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file'] - perm_strata_string = perm_strata_string[:-1] + ")" + return process_rqtl_results(out_file) - cross = add_phenotype(cross, perm_strata_string, "the_strata") - strata_ob = pull_var("perm_strata", cross, col_string) +def process_rqtl_results(out_file: str) -> List: + """Given the output filename, read in results and + return as a list of dictionaries representing each + marker - return cross, strata_ob + """ - -def sanitize_rqtl_phenotype(vals): - pheno_as_string = "c(" - for i, val in enumerate(vals): - if val == "x": - if i < (len(vals) - 1): - pheno_as_string += "NA," + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(GN3_TMP_PATH + "/output/" + out_file, "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + continue else: - pheno_as_string += "NA" + # Convert chr to int if possible + try: + the_chr = int(line_items[1][1:-1]) + except: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + +def get_hash_of_textio(the_file: TextIO) -> str: + """Given a StringIO, return the hash of its contents""" + + the_file.seek(0) + hash_of_file = hashlib.md5(the_file.read().encode()).hexdigest() + + return hash_of_file + + +def write_phenotype_file(trait_name: str, + samples: List[str], + vals: List, + dataset_ob, + cofactors: Optional[str] = None) -> TextIO: + """Given trait name, sample list, value list, dataset ob, and optional string + representing cofactors, return the file's full path/name + + """ + + cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples) + + pheno_file = io.StringIO() + writer = csv.writer(pheno_file, delimiter="\t", quoting=csv.QUOTE_NONE) + + header_row = ["Samples", trait_name] + header_row += [cofactor for cofactor in cofactor_data] + + writer.writerow(header_row) + for i, sample in enumerate(samples): + this_row = [sample] + if vals[i] != "x": + this_row.append(vals[i]) else: - if i < (len(vals) - 1): - pheno_as_string += str(val) + "," - else: - pheno_as_string += str(val) - pheno_as_string += ")" - - return pheno_as_string - - -def sanitize_rqtl_names(vals): - pheno_as_string = "c(" - for i, val in enumerate(vals): - if val == "x": - if i < (len(vals) - 1): - pheno_as_string += "NA," - else: - pheno_as_string += "NA" - else: - if i < (len(vals) - 1): - pheno_as_string += "'" + str(val) + "'," - else: - pheno_as_string += "'" + str(val) + "'" - pheno_as_string += ")" - - return pheno_as_string - - -def add_phenotype(cross, pheno_as_string, col_name): - ro.globalenv["the_cross"] = cross - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + col_name + \ - ' = as.numeric(' + pheno_as_string + '))') - return ro.r["the_cross"] - - -def add_categorical_covar(cross, covar_as_string, i): - ro.globalenv["the_cross"] = cross - logger.info("cross set") - ro.r('covar <- as.factor(' + covar_as_string + ')') - logger.info("covar set") - ro.r('newcovar <- model.matrix(~covar-1)') - logger.info("model.matrix finished") - ro.r('cat("new covar columns", ncol(newcovar), "\n")') - nCol = ro.r('ncol(newcovar)') - logger.info("ncol covar done: " + str(nCol[0])) - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - logger.info("pheno pulled from cross") - nCol = int(nCol[0]) - logger.info("nCol python int:" + str(nCol)) - col_names = [] - # logger.info("loop") - for x in range(1, (nCol + 1)): - #logger.info("loop" + str(x)); - col_name = "covar_" + str(i) + "_" + str(x) - #logger.info("col_name" + col_name); - ro.r('the_cross$pheno <- cbind(pheno, ' + \ - col_name + ' = newcovar[,' + str(x) + '])') - col_names.append(col_name) - #logger.info("loop" + str(x) + "done"); - - logger.info("returning from add_categorical_covar") - return ro.r["the_cross"], col_names - - -def add_names(cross, names_as_string, col_name): - ro.globalenv["the_cross"] = cross - ro.r('pheno <- data.frame(pull.pheno(the_cross))') - ro.r('the_cross$pheno <- cbind(pheno, ' + \ - col_name + ' = ' + names_as_string + ')') - return ro.r["the_cross"] - - -def pull_var(var_name, cross, var_string): - ro.globalenv["the_cross"] = cross - ro.r(var_name + ' <- pull.pheno(the_cross, ' + var_string + ')') - - return ro.r[var_name] - - -def add_cofactors(cross, this_dataset, covariates, samples): - ro.numpy2ri.activate() - - covariate_list = covariates.split(",") - covar_name_string = "c(" - for i, covariate in enumerate(covariate_list): - logger.info("Covariate: " + covariate) - this_covar_data = [] - covar_as_string = "c(" - trait_name = covariate.split(":")[0] - dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = create_trait(dataset=dataset_ob, - name=trait_name, - cellid=None) - - this_dataset.group.get_samplelist() - trait_samples = this_dataset.group.samplelist - trait_sample_data = trait_ob.data - for index, sample in enumerate(samples): - if sample in trait_samples: - if sample in trait_sample_data: - sample_value = trait_sample_data[sample].value - this_covar_data.append(sample_value) - else: - this_covar_data.append("NA") - - for j, item in enumerate(this_covar_data): - if j < (len(this_covar_data) - 1): - covar_as_string += str(item) + "," - else: - covar_as_string += str(item) - - covar_as_string += ")" - - datatype = get_trait_data_type(covariate) - logger.info("Covariate: " + covariate + " is of type: " + datatype) - if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar") - cross, col_names = add_categorical_covar( - cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned") - # Go through the additional covar names - for z, col_name in enumerate(col_names): - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - if(z < (len(col_names) - 1)): - covar_name_string += '"' + col_name + '", ' + this_row.append("NA") + for cofactor in cofactor_data: + this_row.append(cofactor_data[cofactor][i]) + writer.writerow(this_row) + + hash_of_file = get_hash_of_textio(pheno_file) + file_path = TMPDIR + hash_of_file + ".csv" + + with open(file_path, "w") as fd: + pheno_file.seek(0) + shutil.copyfileobj(pheno_file, fd) + + return file_path + + +def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: + """Given a string of cofactors, the trait being mapped's dataset ob, + and list of samples, return cofactor data as a Dict + + """ + cofactors = {} + if cofactors: + dataset_ob.group.get_samplelist() + sample_list = dataset_ob.group.samplelist + for cofactor in cofactors.split(","): + cofactor_name, cofactor_dataset = cofactor.split(":") + if cofactor_dataset == this_dataset.name: + cofactors[cofactor_name] = [] + trait_ob = create_trait(dataset=dataset_ob, + name=cofactor_name) + sample_data = trait_ob.data + for index, sample in enumerate(samples): + #if (sample in sample_list) and (sample in sample_data): + if sample in sample_data: + sample_value = sample_data[sample].value + cofactors[cofactor_name].append(sample_value) else: - covar_name_string += '"' + col_name + '"' - else: - col_name = "covar_" + str(i) - cross = add_phenotype(cross, covar_as_string, col_name) - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - covar_name_string += '"' + col_name + '"' - - covar_name_string += ")" - covars_ob = pull_var("trait_covars", cross, covar_name_string) - return cross, covars_ob - - -def create_marker_covariates(control_marker, cross): - ro.globalenv["the_cross"] = cross - # Get the genotype matrix - ro.r('genotypes <- pull.geno(the_cross)') - # TODO: sanitize user input, Never Ever trust a user - userinput_sanitized = control_marker.replace(" ", "").split(",") - logger.debug(userinput_sanitized) - if len(userinput_sanitized) > 0: - covariate_names = ', '.join('"{0}"'.format(w) - for w in userinput_sanitized) - ro.r('covnames <- c(' + covariate_names + ')') - else: - ro.r('covnames <- c()') - ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') - ro.r('covnames <- covnames[covInGeno]') - ro.r("cat('covnames (purged): ', covnames,'\n')") - # Get the covariate matrix by using the marker name as index to the genotype file - ro.r('marker_covars <- genotypes[,covnames]') - # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc - return ro.r["marker_covars"] - - -def process_pair_scan_results(result): - pair_scan_results = [] - - result = result[1] - output = [tuple([result[j][i] for j in range(result.ncol)]) - for i in range(result.nrow)] - - for i, line in enumerate(result.iter_row()): - marker = {} - marker['name'] = result.rownames[i] - marker['chr1'] = output[i][0] - marker['Mb'] = output[i][1] - marker['chr2'] = int(output[i][2]) - pair_scan_results.append(marker) - - return pair_scan_results - - -def process_rqtl_perm_results(num_perm, results): - perm_vals = [item[0] for item in results] - - perm_output = perm_vals - suggestive = np.percentile(np.array(perm_vals), 67) - significant = np.percentile(np.array(perm_vals), 95) - - return perm_output, suggestive, significant - - -def process_rqtl_results(marker_names, results, species_name): # TODO: how to make this a one liner and not copy the stuff in a loop - qtl_results = [] - - for i, line in enumerate(results): - marker = {} - marker['name'] = marker_names[i] - if species_name == "mouse" and line[0] == 20: - marker['chr'] = "X" - else: - try: - marker['chr'] = int(line[0]) - except: - marker['chr'] = line[0] - marker['cM'] = marker['Mb'] = line[1] - marker['lod_score'] = line[2] - qtl_results.append(marker) - - return qtl_results + cofactors[cofactor_name].append("NA") + return cofactors \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index a3b579ec..be1186c0 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -242,10 +242,10 @@ class RunMapping: # if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: - self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( - self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl( + self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: - results = rqtl_mapping.run_rqtl_geno(self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, + results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON -- cgit v1.2.3 From efbe47b58f6e0b9a4b509a28ec8788a1c6dae343 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 21 May 2021 19:43:23 +0000 Subject: run_rqtl now reads in permutation output --- wqflask/wqflask/marker_regression/rqtl_mapping.py | 34 +++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 3f4899b0..c3f047fc 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -8,6 +8,8 @@ from typing import List from typing import Optional from typing import TextIO +import numpy as np + from base.webqtlConfig import TMPDIR from base.trait import create_trait from utility.tools import locate @@ -46,7 +48,11 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, p out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file'] - return process_rqtl_results(out_file) + if num_perm > 0: + perm_results, suggestive, significant = process_perm_results(out_file) + return perm_results, suggestive, significant, process_rqtl_results(out_file) + else: + return process_rqtl_results(out_file) def process_rqtl_results(out_file: str) -> List: @@ -63,6 +69,7 @@ def process_rqtl_results(out_file: str) -> List: for line in the_file: line_items = line.split(",") if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line continue else: # Convert chr to int if possible @@ -81,6 +88,30 @@ def process_rqtl_results(out_file: str) -> List: return marker_obs + +def process_perm_results(out_file: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(GN3_TMP_PATH + "/output/PERM_" + out_file, "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + else: + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + logger.debug("PERM RESULTS:", perm_results) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant + + def get_hash_of_textio(the_file: TextIO) -> str: """Given a StringIO, return the hash of its contents""" @@ -146,7 +177,6 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: name=cofactor_name) sample_data = trait_ob.data for index, sample in enumerate(samples): - #if (sample in sample_list) and (sample in sample_data): if sample in sample_data: sample_value = sample_data[sample].value cofactors[cofactor_name].append(sample_value) -- cgit v1.2.3 From d2edff1a05a81cd2ca703de940ea2bec5bd1dd10 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 21 May 2021 20:32:15 +0000 Subject: Removed a couple unused parameters for run_rqtl --- wqflask/wqflask/marker_regression/rqtl_mapping.py | 2 +- wqflask/wqflask/marker_regression/run_mapping.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index c3f047fc..d18f6a7b 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -20,7 +20,7 @@ logger = utility.logger.getLogger(__name__) GN3_RQTL_URL = "http://localhost:8086/api/rqtl/compute" GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp" -def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, method, model, permCheck, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, pair_scan, cofactors): +def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors): """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors) diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index be1186c0..d727e3ff 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -243,10 +243,10 @@ class RunMapping: # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl( - self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) else: - results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, - self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) + results = rqtl_mapping.run_rqtl(self.this_trait.name, self.vals, self.samples, self.dataset, self.mapping_scale, self.model, self.method, + self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: # ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: -- cgit v1.2.3 From 579ec56e488db19e7cd489b906ae6da7cd453239 Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 22 May 2021 22:31:51 +0000 Subject: Rewrote test_rqtl_mapping.py, though haven't done tests for all functions yet --- .../wqflask/marker_regression/test_rqtl_mapping.py | 76 ++++++++++++---------- 1 file changed, 40 insertions(+), 36 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 91d2c587..5c679c05 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -1,42 +1,46 @@ import unittest from unittest import mock -from wqflask import app -from wqflask.marker_regression.rqtl_mapping import get_trait_data_type -from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_phenotype -from wqflask.marker_regression.rqtl_mapping import sanitize_rqtl_names +from wqflask.marker_regression.rqtl_mapping import run_rqtl +class AttributeSetter: + def __init__(self, obj): + for key, val in obj.items(): + setattr(self, key, val) + +class MockGroup(AttributeSetter): + def get_samplelist(self): + return None class TestRqtlMapping(unittest.TestCase): - def setUp(self): - self.app_context = app.app_context() - self.app_context.push() - - def tearDown(self): - self.app_context.pop() - - @mock.patch("wqflask.marker_regression.rqtl_mapping.g") - @mock.patch("wqflask.marker_regression.rqtl_mapping.logger") - def test_get_trait_data(self, mock_logger, mock_db): - """test for getting trait data_type return True""" - query_value = """SELECT value FROM TraitMetadata WHERE type='trait_data_type'""" - mock_db.db.execute.return_value.fetchone.return_value = [ - """{"type":"trait_data_type","name":"T1","traid_id":"fer434f"}"""] - results = get_trait_data_type("traid_id") - mock_db.db.execute.assert_called_with(query_value) - self.assertEqual(results, "fer434f") - - def test_sanitize_rqtl_phenotype(self): - """test for sanitizing rqtl phenotype""" - vals = ['f', "x", "r", "x", "x"] - results = sanitize_rqtl_phenotype(vals) - expected_phenotype_string = 'c(f,NA,r,NA,NA)' - - self.assertEqual(results, expected_phenotype_string) - - def test_sanitize_rqtl_names(self): - """test for sanitzing rqtl names""" - vals = ['f', "x", "r", "x", "x"] - expected_sanitized_name = "c('f',NA,'r',NA,NA)" - results = sanitize_rqtl_names(vals) - self.assertEqual(expected_sanitized_name, results) + @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results") + @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results") + @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post") + @mock.patch("wqflask.marker_regression.rqtl_mapping.locate") + @mock.patch("wqflask.marker_regression.gemma_mapping.write_phenotype_file") + def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl): + """Test for run_rqtl with permutations > 0""" + dataset_group = MockGroup( + {"name": "GP1", "genofile": "file_geno"}) + + dataset = AttributeSetter({"group": dataset_group}) + + mock_write_pheno_file.return_value = "pheno_filename" + mock_locate.return_value = "geno_filename" + + mock_post.return_value = "output_filename" + + mock_process_perm.return_value = [[], 3, 4] + mock_process_rqtl.return_value = [] + + results = run_rqtl(trait_name="the_trait", vals=[], samples=[], + dataset=dataset, mapping_scale="cM", model="normal", method="hk", + num_perm=5, perm_strata_list=[], do_control="false", control_marker="", + manhattan_plot=True, cofactors="") + + mock_write_pheno_file.assert_called_once() + mock_locate.assert_called_once() + mock_post.assert_called_once() + mock_process_perm.assert_called_once() + mock_process_rqtl.assert_called_once() + self.assertEqual(results, ([], 3, 4, [])) \ No newline at end of file -- cgit v1.2.3 From 1ec1219eae75f1cf9a1b20ea479ea6e2934eaa68 Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 22 May 2021 22:43:58 +0000 Subject: Fixed a couple broken tests --- wqflask/tests/unit/wqflask/api/test_mapping.py | 2 +- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/api/test_mapping.py b/wqflask/tests/unit/wqflask/api/test_mapping.py index b094294a..159c982b 100644 --- a/wqflask/tests/unit/wqflask/api/test_mapping.py +++ b/wqflask/tests/unit/wqflask/api/test_mapping.py @@ -58,7 +58,7 @@ class TestMapping(unittest.TestCase): self.assertEqual(results_2, expected_results) - @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl_geno") + @mock.patch("wqflask.api.mapping.rqtl_mapping.run_rqtl") @mock.patch("wqflask.api.mapping.gemma_mapping.run_gemma") @mock.patch("wqflask.api.mapping.initialize_parameters") @mock.patch("wqflask.api.mapping.retrieve_sample_data") diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 5c679c05..626869b8 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -17,7 +17,7 @@ class TestRqtlMapping(unittest.TestCase): @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results") @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post") @mock.patch("wqflask.marker_regression.rqtl_mapping.locate") - @mock.patch("wqflask.marker_regression.gemma_mapping.write_phenotype_file") + @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file") def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl): """Test for run_rqtl with permutations > 0""" dataset_group = MockGroup( -- cgit v1.2.3 From cf9f0bef4454f97270bcacd16080f9d7f6bbb5e6 Mon Sep 17 00:00:00 2001 From: zsloan Date: Sat, 22 May 2021 22:51:19 +0000 Subject: Fixed the mocked return_value for requests.post to fix broken test --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 626869b8..6853c021 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -28,7 +28,8 @@ class TestRqtlMapping(unittest.TestCase): mock_write_pheno_file.return_value = "pheno_filename" mock_locate.return_value = "geno_filename" - mock_post.return_value = "output_filename" + mock_post.return_value = {"output_file": "output_filename", + "rqtl_cmd": "the_command"} mock_process_perm.return_value = [[], 3, 4] mock_process_rqtl.return_value = [] -- cgit v1.2.3 From 4d15c32e7d86885cffce1720db0df394bd6a02ff Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:19:37 +0000 Subject: Removed code that processes output files, since this functionality is moved to GN3 --- wqflask/wqflask/marker_regression/rqtl_mapping.py | 67 ++--------------------- 1 file changed, 4 insertions(+), 63 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index d18f6a7b..495678e9 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -46,70 +46,11 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, n if cofactors: post_data["addcovar"] = True - out_file = requests.post(GN3_RQTL_URL, data=post_data).json()['output_file'] - + rqtl_output = requests.post(GN3_RQTL_URL, data=post_data).json() if num_perm > 0: - perm_results, suggestive, significant = process_perm_results(out_file) - return perm_results, suggestive, significant, process_rqtl_results(out_file) + return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results'] else: - return process_rqtl_results(out_file) - - -def process_rqtl_results(out_file: str) -> List: - """Given the output filename, read in results and - return as a list of dictionaries representing each - marker - - """ - - marker_obs = [] - # Later I should probably redo this using csv.read to avoid the - # awkwardness with removing quotes with [1:-1] - with open(GN3_TMP_PATH + "/output/" + out_file, "r") as the_file: - for line in the_file: - line_items = line.split(",") - if line_items[1][1:-1] == "chr" or not line_items: - # Skip header line - continue - else: - # Convert chr to int if possible - try: - the_chr = int(line_items[1][1:-1]) - except: - the_chr = line_items[1][1:-1] - this_marker = { - "name": line_items[0][1:-1], - "chr": the_chr, - "cM": float(line_items[2]), - "Mb": float(line_items[2]), - "lod_score": float(line_items[3]) - } - marker_obs.append(this_marker) - - return marker_obs - - -def process_perm_results(out_file: str): - """Given base filename, read in R/qtl permutation output and calculate - suggestive and significant thresholds - - """ - perm_results = [] - with open(GN3_TMP_PATH + "/output/PERM_" + out_file, "r") as the_file: - for i, line in enumerate(the_file): - if i == 0: - # Skip header line - continue - else: - line_items = line.split(",") - perm_results.append(float(line_items[1])) - - logger.debug("PERM RESULTS:", perm_results) - - suggestive = np.percentile(np.array(perm_results), 67) - significant = np.percentile(np.array(perm_results), 95) - - return perm_results, suggestive, significant + return rqtl_output['results'] def get_hash_of_textio(the_file: TextIO) -> str: @@ -182,4 +123,4 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: cofactors[cofactor_name].append(sample_value) else: cofactors[cofactor_name].append("NA") - return cofactors \ No newline at end of file + return cofactors -- cgit v1.2.3 From 666494b49bef48a46145cf415262d532530aba46 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 25 May 2021 20:49:48 +0000 Subject: Fixed/shortened get_categorical_variables function and correctly pass them to show_trait template --- wqflask/wqflask/show_trait/show_trait.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 837c7a54..a02da872 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -279,6 +279,8 @@ class ShowTrait: hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['categorical_vars'] = "" + if categorical_var_list: + hddn['categorical_vars'] = ",".join(categorical_var_list) hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: @@ -684,23 +686,13 @@ def get_ncbi_summary(this_trait): return None -def get_categorical_variables(this_trait, sample_list): +def get_categorical_variables(this_trait, sample_list) -> list: categorical_var_list = [] if len(sample_list.attributes) > 0: for attribute in sample_list.attributes: - attribute_vals = [] - for sample_name in list(this_trait.data.keys()): - if sample_list.attributes[attribute].name in this_trait.data[sample_name].extra_attributes: - attribute_vals.append( - this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name]) - else: - attribute_vals.append("N/A") - num_distinct = len(set(attribute_vals)) - - if num_distinct < 10: - categorical_var_list.append( - sample_list.attributes[attribute].name) + if len(sample_list.attributes[attribute].distinct_values) < 10: + categorical_var_list.append(sample_list.attributes[attribute].name) return categorical_var_list -- cgit v1.2.3 From 92f84ab7c544ac9d7ffdbf302cf35b8b67ef7819 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 19:59:24 +0000 Subject: Add code writing perm_strata to phenotype input file in rqtl_mapping.py + fix issue in cofactors_to_dict where two variables were both called 'cofactors' --- wqflask/wqflask/marker_regression/rqtl_mapping.py | 25 +++++++++++++++-------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index 495678e9..09afb8d1 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -23,7 +23,7 @@ GN3_TMP_PATH = "/export/local/home/zas1024/genenetwork3/tmp" def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, num_perm, perm_strata_list, do_control, control_marker, manhattan_plot, cofactors): """Run R/qtl by making a request to the GN3 endpoint and reading in the output file(s)""" - pheno_file = write_phenotype_file(trait_name, samples, vals, cofactors) + pheno_file = write_phenotype_file(trait_name, samples, vals, dataset, cofactors, perm_strata_list) if dataset.group.genofile: geno_file = locate(dataset.group.genofile, "genotype") else: @@ -46,6 +46,9 @@ def run_rqtl(trait_name, vals, samples, dataset, mapping_scale, model, method, n if cofactors: post_data["addcovar"] = True + if perm_strata_list: + post_data["pstrata"] = True + rqtl_output = requests.post(GN3_RQTL_URL, data=post_data).json() if num_perm > 0: return rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'], rqtl_output['results'] @@ -66,12 +69,12 @@ def write_phenotype_file(trait_name: str, samples: List[str], vals: List, dataset_ob, - cofactors: Optional[str] = None) -> TextIO: + cofactors: Optional[str] = None, + perm_strata_list: Optional[List] = None) -> TextIO: """Given trait name, sample list, value list, dataset ob, and optional string representing cofactors, return the file's full path/name """ - cofactor_data = cofactors_to_dict(cofactors, dataset_ob, samples) pheno_file = io.StringIO() @@ -79,6 +82,8 @@ def write_phenotype_file(trait_name: str, header_row = ["Samples", trait_name] header_row += [cofactor for cofactor in cofactor_data] + if perm_strata_list: + header_row.append("Strata") writer.writerow(header_row) for i, sample in enumerate(samples): @@ -89,6 +94,8 @@ def write_phenotype_file(trait_name: str, this_row.append("NA") for cofactor in cofactor_data: this_row.append(cofactor_data[cofactor][i]) + if perm_strata_list: + this_row.append(perm_strata_list[i]) writer.writerow(this_row) hash_of_file = get_hash_of_textio(pheno_file) @@ -106,21 +113,21 @@ def cofactors_to_dict(cofactors: str, dataset_ob, samples) -> Dict: and list of samples, return cofactor data as a Dict """ - cofactors = {} + cofactor_dict = {} if cofactors: dataset_ob.group.get_samplelist() sample_list = dataset_ob.group.samplelist for cofactor in cofactors.split(","): cofactor_name, cofactor_dataset = cofactor.split(":") - if cofactor_dataset == this_dataset.name: - cofactors[cofactor_name] = [] + if cofactor_dataset == dataset_ob.name: + cofactor_dict[cofactor_name] = [] trait_ob = create_trait(dataset=dataset_ob, name=cofactor_name) sample_data = trait_ob.data for index, sample in enumerate(samples): if sample in sample_data: sample_value = sample_data[sample].value - cofactors[cofactor_name].append(sample_value) + cofactor_dict[cofactor_name].append(sample_value) else: - cofactors[cofactor_name].append("NA") - return cofactors + cofactor_dict[cofactor_name].append("NA") + return cofactor_dict -- cgit v1.2.3 From 6a54fa84542458fe8cfd288bf4c507214a1ddd04 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:08:14 +0000 Subject: Make all attribute names lowercase to deal with situations where sometimes they're lower-case and other times not --- wqflask/wqflask/marker_regression/run_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index d727e3ff..c5b980a7 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -765,9 +765,9 @@ def get_perm_strata(this_trait, sample_list, categorical_vars, used_samples): if sample in list(sample_list.sample_attribute_values.keys()): combined_string = "" for var in categorical_vars: - if var in list(sample_list.sample_attribute_values[sample].keys()): + if var.lower() in sample_list.sample_attribute_values[sample]: combined_string += str( - sample_list.sample_attribute_values[sample][var]) + sample_list.sample_attribute_values[sample][var.lower()]) else: combined_string += "NA" else: -- cgit v1.2.3 From 2c2e1dc3d4be635dbf73aab380e22acca6a2014b Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:18:36 +0000 Subject: Improved test_rqtl_mapping.py with Bonface's recommendation of uses dataclasses --- .../wqflask/marker_regression/test_rqtl_mapping.py | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 6853c021..bd97b2d2 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -2,17 +2,17 @@ import unittest from unittest import mock from wqflask.marker_regression.rqtl_mapping import run_rqtl -class AttributeSetter: - def __init__(self, obj): - for key, val in obj.items(): - setattr(self, key, val) +@dataclass +class MockGroup: + name: str, + genofile: str -class MockGroup(AttributeSetter): - def get_samplelist(self): - return None +@dataclass +class MockDataset: + group: MockGroup class TestRqtlMapping(unittest.TestCase): - + """Tests for functions in rqtl_mapping.py""" @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results") @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results") @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post") @@ -22,15 +22,12 @@ class TestRqtlMapping(unittest.TestCase): """Test for run_rqtl with permutations > 0""" dataset_group = MockGroup( {"name": "GP1", "genofile": "file_geno"}) - - dataset = AttributeSetter({"group": dataset_group}) + dataset = MockDataset(dataset_group) mock_write_pheno_file.return_value = "pheno_filename" mock_locate.return_value = "geno_filename" - mock_post.return_value = {"output_file": "output_filename", "rqtl_cmd": "the_command"} - mock_process_perm.return_value = [[], 3, 4] mock_process_rqtl.return_value = [] @@ -44,4 +41,4 @@ class TestRqtlMapping(unittest.TestCase): mock_post.assert_called_once() mock_process_perm.assert_called_once() mock_process_rqtl.assert_called_once() - self.assertEqual(results, ([], 3, 4, [])) \ No newline at end of file + self.assertEqual(results, ([], 3, 4, [])) -- cgit v1.2.3 From 37941d2724aaf22c1aeca18a77e4c17248c5b7bc Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:27:26 +0000 Subject: Updated test_run_mapping.py to account for attribute keys being checked as lowercase, though not sure if this will fully fix the test --- .../unit/wqflask/marker_regression/test_run_mapping.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py index 78cd3be9..c220a072 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_run_mapping.py @@ -229,20 +229,20 @@ class TestRunMapping(unittest.TestCase): used_samples = ["S1", "S2"] sample_list = AttributeSetter({"sample_attribute_values": { "S1": { - "C1": "c1_value", - "C2": "c2_value", - "W1": "w1_value" + "c1": "c1_value", + "c2": "c2_value", + "w1": "w1_value" }, "S2": { - "W1": "w2_value", - "W2": "w2_value" + "w1": "w2_value", + "w2": "w2_value" }, "S3": { - "C1": "c1_value", - "C2": "c2_value" + "c1": "c1_value", + "c2": "c2_value" }, -- cgit v1.2.3 From 09768ed9c255cdd10561eeba28bf3752747bf378 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:29:57 +0000 Subject: Removed parts of test_rqtl_mapping.py referring to process_perm_results and process_rqtl_results since that functionality was moved to GN3 --- .../tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index bd97b2d2..00a05d2d 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -13,12 +13,10 @@ class MockDataset: class TestRqtlMapping(unittest.TestCase): """Tests for functions in rqtl_mapping.py""" - @mock.patch("wqflask.marker_regression.rqtl_mapping.process_rqtl_results") - @mock.patch("wqflask.marker_regression.rqtl_mapping.process_perm_results") @mock.patch("wqflask.marker_regression.rqtl_mapping.requests.post") @mock.patch("wqflask.marker_regression.rqtl_mapping.locate") @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file") - def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post, mock_process_perm, mock_process_rqtl): + def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post): """Test for run_rqtl with permutations > 0""" dataset_group = MockGroup( {"name": "GP1", "genofile": "file_geno"}) @@ -28,8 +26,6 @@ class TestRqtlMapping(unittest.TestCase): mock_locate.return_value = "geno_filename" mock_post.return_value = {"output_file": "output_filename", "rqtl_cmd": "the_command"} - mock_process_perm.return_value = [[], 3, 4] - mock_process_rqtl.return_value = [] results = run_rqtl(trait_name="the_trait", vals=[], samples=[], dataset=dataset, mapping_scale="cM", model="normal", method="hk", @@ -39,6 +35,5 @@ class TestRqtlMapping(unittest.TestCase): mock_write_pheno_file.assert_called_once() mock_locate.assert_called_once() mock_post.assert_called_once() - mock_process_perm.assert_called_once() - mock_process_rqtl.assert_called_once() + self.assertEqual(results, ([], 3, 4, [])) -- cgit v1.2.3 From 0cc4376462b7f136e59d99dba1975d6023326126 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:32:58 +0000 Subject: Fixed syntax mistake when creating dataclass in test_rqtl_mapping.py --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 00a05d2d..31a2c9e4 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -4,7 +4,7 @@ from wqflask.marker_regression.rqtl_mapping import run_rqtl @dataclass class MockGroup: - name: str, + name: str genofile: str @dataclass -- cgit v1.2.3 From 97df46292e8322f01466d36b1e1e3ecdad64e5c0 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:50:03 +0000 Subject: Forgot to import dataclass --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 31a2c9e4..ff451230 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -1,5 +1,7 @@ import unittest from unittest import mock +from dataclasses import dataclass + from wqflask.marker_regression.rqtl_mapping import run_rqtl @dataclass -- cgit v1.2.3 From 767ff7e97a751f164da610b2f9c536b6660ec420 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 20:53:22 +0000 Subject: Fixed way MockGroup was initialized --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index ff451230..a7f708e1 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -20,8 +20,7 @@ class TestRqtlMapping(unittest.TestCase): @mock.patch("wqflask.marker_regression.rqtl_mapping.write_phenotype_file") def test_run_rqtl_with_perm(self, mock_write_pheno_file, mock_locate, mock_post): """Test for run_rqtl with permutations > 0""" - dataset_group = MockGroup( - {"name": "GP1", "genofile": "file_geno"}) + dataset_group = MockGroup("GP1", "file_geno") dataset = MockDataset(dataset_group) mock_write_pheno_file.return_value = "pheno_filename" -- cgit v1.2.3 From f69eb61c5f58b008233f7bee0e551fe9151c7724 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 21:09:18 +0000 Subject: Change test_rqtl_mapping.py to account for full results being returned from the GN3 request --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index a7f708e1..9a5162ae 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -25,8 +25,11 @@ class TestRqtlMapping(unittest.TestCase): mock_write_pheno_file.return_value = "pheno_filename" mock_locate.return_value = "geno_filename" - mock_post.return_value = {"output_file": "output_filename", - "rqtl_cmd": "the_command"} + mock_post.return_value = Mock(ok=True) + mock_post.return_value.json.return_value = {"perm_results": [], + "suggestive": 3, + "significant": 4, + "results" : []} results = run_rqtl(trait_name="the_trait", vals=[], samples=[], dataset=dataset, mapping_scale="cM", model="normal", method="hk", -- cgit v1.2.3 From e78c84aa10849465e0272daa6d94bfbd3419b072 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 27 May 2021 21:11:16 +0000 Subject: Fix the way Mock is initialized --- wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py index 9a5162ae..9d13e943 100644 --- a/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py +++ b/wqflask/tests/unit/wqflask/marker_regression/test_rqtl_mapping.py @@ -25,7 +25,7 @@ class TestRqtlMapping(unittest.TestCase): mock_write_pheno_file.return_value = "pheno_filename" mock_locate.return_value = "geno_filename" - mock_post.return_value = Mock(ok=True) + mock_post.return_value = mock.Mock(ok=True) mock_post.return_value.json.return_value = {"perm_results": [], "suggestive": 3, "significant": 4, -- cgit v1.2.3