From b815236123ff8e144bd84f349357a1852df95651 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 23 Sep 2021 19:17:34 +0000 Subject: Fix covariates as described by Danny --- scripts/rqtl_wrapper.R | 53 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 7518175..20c0e06 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -9,6 +9,7 @@ option_list = list( make_option(c("-g", "--geno"), type="character", help=".geno file containing a dataset's genotypes"), make_option(c("-p", "--pheno"), type="character", help="File containing two columns - sample names and values"), make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"), + make_option(c("--covarstruct"), type="character", help="File detailing which covariates are categorical or numerical"), make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"), make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"), make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"), @@ -33,6 +34,20 @@ verbose_print <- function(...){ } } +adjustXprobs <- function(cross){ + sex <- getsex(cross)$sex + pr <- cross$geno[["X"]]$prob + stopifnot(!is.null(pr), !is.null(sex)) + + for(i in 1:ncol(pr)) { + pr[sex==0,i,3:4] <- 0 + pr[sex==1,i,1:2] <- 0 + pr[,i,] <- pr[,i,]/rowSums(pr[,i,]) + } + cross$geno[["X"]]$prob <- pr + invisible(cross) +} + if (is.null(opt$geno) || is.null(opt$pheno)){ print_help(opt_parser) stop("Both a genotype and phenotype file must be provided.", call.=FALSE) @@ -51,7 +66,7 @@ get_geno_code <- function(header, name = 'unk'){ return(trim(strsplit(header[mat],':')[[1]][2])) } -geno_to_csvr <- function(genotypes, trait_names, trait_vals, out, sex = NULL, +geno_to_csvr <- function(genotypes, trait_names, trait_vals, out, type, sex = NULL, mapping_scale = "Mb", verbose = FALSE){ # Assume a geno header is not longer than 40 lines header = readLines(genotypes, 40) @@ -148,8 +163,12 @@ for (i in 1:length(trait_names)) { trait_names[i] = paste("T_", this_trait, sep = "") } +# Get type of genotypes, since it needs to be checked before calc.genoprob +header = readLines(geno_file, 40) +type <- get_geno_code(header, 'type') + verbose_print('Generating cross object\n') -cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file) +cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type) # Calculate genotype probabilities if (!is.null(opt$interval)) { @@ -160,16 +179,40 @@ if (!is.null(opt$interval)) { cross_object <- calc.genoprob(cross_object) } +# If 4way, adjust X chromosome genotype probabilities +if (type == "4-way") { + verbose_print('Adjusting genotype probabilities for 4way cross') + cross_object <- adjustXprobs(cross_object) +} + # Pull covariates out of cross object, if they exist -covars = vector(mode = "list", length = length(trait_names) - 1) +covars <- c() # Holds the covariates which should be passed to R/qtl if (!is.null(opt$addcovar)) { - #If perm strata are being used, it'll be included as the final column in the phenotype file + # If perm strata are being used, it'll be included as the final column in the phenotype file if (!is.null(opt$pstrata)) { - covar_names = trait_names[3:length(trait_names) - 1] + covar_names = trait_names[2:(length(trait_names)-1)] } else { covar_names = trait_names[2:length(trait_names)] } covars <- pull.pheno(cross_object, covar_names) + # Read in the covar description file + covarDescr <- read.table(opt$covarstruct, sep="\t", header=FALSE) + for(x in 1:nrow(covarDescr)){ + cat(covarDescr[x, 1]) + name <- paste0("T_", covarDescr[x, 1]) # The covar description file doesn't have T_ in trait names (the cross object does) + type <- covarDescr[x, 2] + if(type == "categorical"){ + if(length(table(covars[,name])) > 2){ # More then 2 levels create the model matrix for the factor + mdata <- data.frame(toExpand = as.factor(covars[, name])) + options(na.action='na.pass') + modelmatrix <- model.matrix(~ toExpand + 0, mdata)[,-1] + covars <- cbind(covars, modelmatrix) + }else{ # 2 levels? just bind the trait as covar + verbose_print('Binding covars to covars\n') + covars <- cbind(covars, covars[,name]) + } + } + } } # Pull permutation strata out of cross object, if it is being used -- cgit v1.2.3 From baa801e81c5b0f7c7b30c6ca880a806a6bce54db Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 14 Oct 2021 18:03:50 +0000 Subject: Incorporated Danny's code for calculating QTL main effects into rqtl_wrapper.R --- scripts/rqtl_wrapper.R | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'scripts') diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 20c0e06..523888f 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -269,4 +269,47 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){ verbose_print('Running scanone\n') qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method) } + +#QTL main effects on adjusted longevity +getEffects <- function(sdata, gtsprob, marker = "1_24042124", model = "longevity ~ sex + site + cohort + treatment", trait = "longevity"){ + rownames(sdata) <- 1:nrow(sdata) + rownames(gtsprob) <- 1:nrow(gtsprob) + mp <- gtsprob[, grep(marker, colnames(gtsprob))] + gts <- unlist(lapply(lapply(lapply(apply(mp,1,function(x){which(x > 0.85)}),names), strsplit, ":"), function(x){ + if(length(x) > 0){ return(x[[1]][2]); }else{ return(NA) } + })) + + ismissing <- which(apply(sdata, 1, function(x){any(is.na(x))})) + if(length(ismissing) > 0){ + sdata <- sdata[-ismissing, ] + gts <- gts[-ismissing] + } + + mlm <- lm(as.formula(model), data = sdata) + pheAdj <- rep(NA, nrow(sdata)) + adj <- residuals(mlm) + mean(sdata[, trait]) + pheAdj[as.numeric(names(adj))] <- adj + means <- c(mean(pheAdj[which(gts == "AC")],na.rm=TRUE),mean(pheAdj[which(gts == "AD")],na.rm=TRUE),mean(pheAdj[which(gts == "BC")],na.rm=TRUE),mean(pheAdj[which(gts == "BD")],na.rm=TRUE)) + std <- function(x) sd(x,na.rm=TRUE)/sqrt(length(x)) + stderrs <- c(std(pheAdj[which(gts == "AC")]),std(pheAdj[which(gts == "AD")]),std(pheAdj[which(gts == "BC")]),std(pheAdj[which(gts == "BD")])) + paste0(round(means,0), " ± ", round(stderrs,2)) +} + +if (type == "4-way") { + verbose_print("Get phenotype name + genoprob + all phenotypes + models for 4-way crosses") + traitname <- colnames(pull.pheno(cross_object))[1] + gtsp <- pull.genoprob(cross_object) + allpheno <- pull.pheno(cross_object) + model <- paste0(traitname, " ~ ", paste0(covar_names, sep="", collapse=" + ")) + + meffects <- c() + verbose_print("Getting QTL main effects for 4-way crosses") + for(marker in rownames(qtl_results)){ + meff <- getEffects(allpheno, gtsp, marker = marker, model, trait = traitname) + meffects <- rbind(meffects, meff) + } + qtl_results <- cbind(data.frame(qtl_results[,1:3]), meffects) + colnames(qtl_results)[4:7] <- c("AC", "AD", "BC", "BD") +} + write.csv(qtl_results, out_file) -- cgit v1.2.3 From 77099cac68e8f4792bf54d8e1f7ce6f315bedfa7 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 14 Oct 2021 21:00:15 +0000 Subject: Fixed when model is set to account for situations with no covariates --- scripts/rqtl_wrapper.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index 523888f..b7a9ae0 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -300,7 +300,11 @@ if (type == "4-way") { traitname <- colnames(pull.pheno(cross_object))[1] gtsp <- pull.genoprob(cross_object) allpheno <- pull.pheno(cross_object) - model <- paste0(traitname, " ~ ", paste0(covar_names, sep="", collapse=" + ")) + if (!is.null(opt$addcovar)) { + model <- paste0(traitname, " ~ ", paste0(covar_names, sep="", collapse=" + ")) + } else { + model <- paste0(traitname, " ~ 1 ") + } meffects <- c() verbose_print("Getting QTL main effects for 4-way crosses") -- cgit v1.2.3 From 453570cbf22a92de72fa0401895c99a923831942 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 5 Oct 2021 15:14:25 +0300 Subject: script modification : add debug statements --- scripts/wgcna_analysis.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 17b3537..a5cbbe3 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -6,11 +6,15 @@ library(rjson) options(stringsAsFactors = FALSE); -imgDir = Sys.getenv("GENERATED_IMAGE_DIR") +cat("Running the wgcna analysis script\n") + # load expression data **assumes from json files row(traits)(columns info+samples) # pass the file_path as arg # pass the file path to read json data + + + args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { @@ -21,6 +25,7 @@ if (length(args)==0) { } inputData <- fromJSON(file = json_file_path) +imgDir = inputData$TMPDIR trait_sample_data <- do.call(rbind, inputData$trait_sample_data) @@ -83,6 +88,11 @@ network <- blockwiseModules(dataExpr, +cat("Generated network \n") + +network + + genImageRandStr <- function(prefix){ randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") @@ -95,6 +105,10 @@ mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) png(imageLoc,width=1000,height=600,type='cairo-png') + +cat("Generating the CLuster dendrogram\n") + + plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], "Module colors", dendroLabels = FALSE, hang = 0.03, -- cgit v1.2.3 From d1be5270b99959f99802a7704562eeaaeb504122 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 13 Oct 2021 07:29:24 +0300 Subject: minor fixes for sript enable annotations --- scripts/wgcna_analysis.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index a5cbbe3..b0d25a9 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -13,8 +13,6 @@ cat("Running the wgcna analysis script\n") # pass the file path to read json data - - args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { @@ -100,6 +98,7 @@ genImageRandStr <- function(prefix){ return(paste(randStr,".png",sep="")) } + mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) @@ -111,7 +110,7 @@ cat("Generating the CLuster dendrogram\n") plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], "Module colors", -dendroLabels = FALSE, hang = 0.03, +dendroLabels = NULL, hang = 0.03, addGuide = TRUE, guideHang = 0.05) -- cgit v1.2.3 From 8f036415975d6e224e5e94277997329c0f1fa159 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 29 Oct 2021 09:49:28 +0300 Subject: Feature/biweight reimplementation (#47) * add biweight reimplementation with pingouin * delete biweight scripts and tests * add python-pingouin to guix file * delete biweight paths * mypy fix:pingouin mising imports * pep8 formatting && pylint fixes--- gn3/computations/biweight.py | 27 ------------------ gn3/computations/correlations.py | 11 ++++---- gn3/settings.py | 3 -- guix.scm | 1 + mypy.ini | 3 ++ scripts/calculate_biweight.R | 43 ----------------------------- tests/unit/computations/test_biweight.py | 21 -------------- tests/unit/computations/test_correlation.py | 11 -------- 8 files changed, 9 insertions(+), 111 deletions(-) delete mode 100644 gn3/computations/biweight.py delete mode 100644 scripts/calculate_biweight.R delete mode 100644 tests/unit/computations/test_biweight.py (limited to 'scripts') diff --git a/gn3/computations/biweight.py b/gn3/computations/biweight.py deleted file mode 100644 index 7accd0c..0000000 --- a/gn3/computations/biweight.py +++ /dev/null @@ -1,27 +0,0 @@ -"""module contains script to call biweight midcorrelation in R""" -import subprocess - -from typing import List -from typing import Tuple - -from gn3.settings import BIWEIGHT_RSCRIPT - - -def calculate_biweight_corr(trait_vals: List, - target_vals: List, - path_to_script: str = BIWEIGHT_RSCRIPT, - command: str = "Rscript" - ) -> Tuple[float, float]: - """biweight function""" - - args_1 = ' '.join(str(trait_val) for trait_val in trait_vals) - args_2 = ' '.join(str(target_val) for target_val in target_vals) - cmd = [command, path_to_script] + [args_1] + [args_2] - - results = subprocess.check_output(cmd, universal_newlines=True) - try: - (corr_coeff, p_val) = tuple( - [float(y.strip()) for y in results.split()]) - return (corr_coeff, p_val) - except Exception as error: - raise error diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index bb13ff1..c930df0 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -8,7 +8,7 @@ from typing import Optional from typing import Callable import scipy.stats -from gn3.computations.biweight import calculate_biweight_corr +import pingouin as pg def map_shared_keys_to_values(target_sample_keys: List, @@ -102,11 +102,10 @@ package :not packaged in guix """ - try: - results = calculate_biweight_corr(x_val, y_val) - return results - except Exception as error: - raise error + results = pg.corr(x_val, y_val, method="bicor") + corr_coeff = results["r"].values[0] + p_val = results["p-val"].values[0] + return (corr_coeff, p_val) def filter_shared_sample_keys(this_samplelist, diff --git a/gn3/settings.py b/gn3/settings.py index d5f1d3c..e85eeff 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -22,9 +22,6 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False GN2_BASE_URL = "http://www.genenetwork.org/" -# biweight script -BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" - # wgcna script WGCNA_RSCRIPT = "wgcna_analysis.R" # qtlreaper command diff --git a/guix.scm b/guix.scm index d8b1596..81e8389 100644 --- a/guix.scm +++ b/guix.scm @@ -110,6 +110,7 @@ ("r-rjson" ,r-rjson) ("python-plotly" ,python-plotly) ("python-pandas" ,python-pandas) + ("python-pingouin" ,python-pingouin) ("rust-qtlreaper" ,rust-qtlreaper) ("python-flask-cors" ,python-flask-cors))) (build-system python-build-system) diff --git a/mypy.ini b/mypy.ini index 5d66812..a507703 100644 --- a/mypy.ini +++ b/mypy.ini @@ -11,3 +11,6 @@ ignore_missing_imports = True [mypy-ipfshttpclient.*] ignore_missing_imports = True + +[mypy-pingouin.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/scripts/calculate_biweight.R b/scripts/calculate_biweight.R deleted file mode 100644 index 8d8366e..0000000 --- a/scripts/calculate_biweight.R +++ /dev/null @@ -1,43 +0,0 @@ - -library(testthat) -library(WGCNA) - -arg_values <- commandArgs(trailingOnly = TRUE) -ParseArgs <- function(args){ - - trait_vals <- as.numeric(unlist(strsplit(args[1], split=" "))) - target_vals <- as.numeric(unlist(strsplit(args[2], split=" "))) - - return(list(trait_vals= c(trait_vals),target_vals = c(target_vals))) - -} -BiweightMidCorrelation <- function(trait_val,target_val){ - - results <-bicorAndPvalue(as.numeric(unlist(trait_val)),as.numeric(unlist(target_val))) - return ((c(c(results$bicor)[1],c(results$p)[1]))) - -} - - - -test_that("biweight results"),{ - vec_1 <- c(1,2,3,4) - vec_2 <- c(1,2,3,4) - - results <- BiweightMidCorrelation(vec_1,vec_2) - expect_equal(c(1.0,0.0),results) -} - - -test_that("parsing args "),{ - my_args <- c("1 2 3 4","5 6 7 8") - results <- ParseArgs(my_args) - - expect_equal(results[1],c(1,2,3,4)) - expect_equal(results[2],c(5,6,7,8)) -} - -parsed_values <- ParseArgs(arg_values) - - -cat(BiweightMidCorrelation(parsed_values[1],parsed_values[2])) \ No newline at end of file diff --git a/tests/unit/computations/test_biweight.py b/tests/unit/computations/test_biweight.py deleted file mode 100644 index ad404f1..0000000 --- a/tests/unit/computations/test_biweight.py +++ /dev/null @@ -1,21 +0,0 @@ -"""test for biweight script""" -from unittest import TestCase -from unittest import mock - -from gn3.computations.biweight import calculate_biweight_corr - - -class TestBiweight(TestCase): - """test class for biweight""" - - @mock.patch("gn3.computations.biweight.subprocess.check_output") - def test_calculate_biweight_corr(self, mock_check_output): - """test for calculate_biweight_corr func""" - mock_check_output.return_value = "0.1 0.5" - results = calculate_biweight_corr(command="Rscript", - path_to_script="./r_script.R", - trait_vals=[ - 1.2, 1.1, 1.9], - target_vals=[1.9, 0.4, 1.1]) - - self.assertEqual(results, (0.1, 0.5)) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index fc52ec1..96d9c6d 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -5,7 +5,6 @@ from unittest import mock from collections import namedtuple from gn3.computations.correlations import normalize_values -from gn3.computations.correlations import do_bicor from gn3.computations.correlations import compute_sample_r_correlation from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import filter_shared_sample_keys @@ -98,16 +97,6 @@ class TestCorrelation(TestCase): self.assertEqual(results, expected_results) - @mock.patch("gn3.computations.correlations.calculate_biweight_corr") - def test_bicor(self, mock_biweight): - """Test for doing biweight mid correlation """ - mock_biweight.return_value = (1.0, 0.0) - - results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6]) - - self.assertEqual(results, (1.0, 0.0) - ) - @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") @mock.patch("gn3.computations.correlations.normalize_values") def test_compute_sample_r_correlation(self, norm_vals, compute_corr): -- cgit v1.2.3