diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/ctl_analysis.R | 217 | ||||
-rw-r--r-- | scripts/laminar/gn3-lint.sh | 4 | ||||
-rw-r--r-- | scripts/laminar/gn3-mypy.sh | 4 | ||||
-rw-r--r-- | scripts/laminar/gn3-unittest.sh | 4 | ||||
-rwxr-xr-x | scripts/partial_correlations.py | 59 | ||||
-rw-r--r-- | scripts/rqtl_wrapper.R | 20 | ||||
-rw-r--r-- | scripts/wgcna_analysis.R | 7 |
7 files changed, 300 insertions, 15 deletions
diff --git a/scripts/ctl_analysis.R b/scripts/ctl_analysis.R new file mode 100644 index 0000000..e06ac25 --- /dev/null +++ b/scripts/ctl_analysis.R @@ -0,0 +1,217 @@ + +library(ctl) +library(stringi); +library(rjson) + +options(stringsAsFactors = FALSE); + +args = commandArgs(trailingOnly=TRUE) + +imgDir = Sys.getenv("GENERATED_IMAGE_DIR") + +if (length(args)==0) { + stop("Argument for the data file", call.=FALSE) +} else { + + json_file_path = args[1] +} + +json_file_path +# add validation for the files +input <- fromJSON(file = json_file_path) + + +cat("The input data is \n") + + +genoData <- input$genoData +phenoData <- input$phenoData + +parametric <- switch( + input$parametric, + "True" = TRUE, + "False" = FALSE + + ) +# create the matixes + +# genotypes Matrix of genotypes. (individuals x markers) +# phenotypes Matrix of phenotypes. (individuals x phenotypes) + +geno_matrix = t(matrix(unlist(genoData$genotypes), + nrow=length(genoData$markernames), ncol=length(genoData$individuals), + dimnames=list(genoData$markernames, genoData$individuals), byrow=TRUE)) + + +pheno_matrix = t(matrix(as.numeric(unlist(phenoData$traits)), nrow=length(phenoData$trait_db_list), ncol=length( + phenoData$individuals), dimnames= list(phenoData$trait_db_list, phenoData$individuals), byrow=TRUE)) + +ctls <- CTLscan(geno_matrix,pheno_matrix,nperm=input$nperm,strategy=input$strategy,parametric=parametric,nthreads=3,verbose=TRUE) + +genImageRandStr <- function(prefix){ + + randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,".png",sep="")) +} + + +genRandomFileName <- function(prefix,file_ext=".png"){ + + randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,file_ext,sep="")) +} + + +# #output matrix significant CTL interactions with 4 columns: trait, marker, trait, lod +ctl_significant <- CTLsignificant(ctls,significance = input$significance) + +colnames(ctl_significant) = c("trait","marker","trait_2","LOD","dcor") + + + +imageLoc = file.path(input$imgDir,genRandomFileName("CTLline")) + +png(imageLoc,width=1000,height=600,type='cairo-png') + +# Create the lineplot +ctl.lineplot(ctls,significance = input$significance, gap = 50, +col = "orange", bg.col = "lightgray", cex = 1, verbose = FALSE) + +dev.off() + + +n = 2 +ctl_plots = c() + +for (trait in phenoData$trait_db_list) +{ + image_loc = file.path(input$imgDir,genRandomFileName(paste("CTL",n,sep=""))) + png(image_loc,width=1000, height=600, type='cairo-png') + plot.CTLobject(ctls,n-1,significance= input$significance, main=paste("Phenotype",trait,sep="")) + + ctl_plots = append(ctl_plots,image_loc) + + dev.off() + n = n + 1 + +} + + + +network_file_name = paste("ctlnet",stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") +netfile = file.path(input$imgDir,paste(network_file_name,".sif",sep="")) + +nodefile = file.path(input$imgDir,paste(network_file_name,".nodes",sep="")) + +# fn overrides ctlnetwork function to target gn2 use case + +CTLnetworkGn<- function(CTLobject, mapinfo, significance = 0.05, LODdrop = 2, what = c("names","ids"), short = FALSE, add.qtls = FALSE,verbose = TRUE){ + if(missing(CTLobject) || is.null(CTLobject)) stop("argument 'CTLobject' is missing, with no default") + if("CTLscan" %in% class(CTLobject)) CTLobject = list(CTLobject) + if(length(what) > 1) what = what[1] + + results <- NULL + significant <- CTLsignificant(CTLobject, significance, what = "ids") + if(!is.null(significant)){ + all_m <- NULL; all_p <- NULL; + + + cat("",file=netfile); cat("",file=nodefile); + if(verbose) cat("NETWORK.SIF\n") + edges <- NULL + for(x in 1:nrow(significant)){ + data <- as.numeric(significant[x,]) + CTLscan <- CTLobject[[data[1]]] + markern <- rownames(CTLscan$dcor) + traitsn <- colnames(CTLscan$dcor) + name <- ctl.name(CTLscan) + if(what=="ids"){ + tid <- which(traitsn %in% ctl.name(CTLobject[[data[1]]])) + name <- paste("P",tid,sep="") + markern <- paste("M",1:nrow(CTLobject[[data[1]]]$dcor), sep="") + traitsn <- paste("P", 1:ncol(CTLobject[[data[1]]]$dcor), sep="") + } + if(add.qtls){ # Add QTL to the output SIF + bfc <- length(CTLscan$qtl) + above <- which(CTLscan$qtl > -log10(significance)) + qtlnms <- names(above); qtlmid <- 1 + for(m in above){ + cat(name,"\tQTL\t",markern[m],"\tQTL\t",CTLscan$qtl[m],"\n",sep="",file=netfile,append=TRUE) + all_m <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], qtlnms[qtlmid]) + qtlmid <- qtlmid+1 + } + } + lod <- CTLscan$ctl[data[2],data[3]] + qlod1 <- CTLscan$qtl[data[2]] + qlod2 <- qlod1 + edgetype <- NA + if(length(CTLobject) >= data[3]){ # Edge type based on QTL LOD scores + qlod2 <- CTLobject[[data[3]]]$qtl[data[2]] + if((qlod1-qlod2) > LODdrop){ + edgetype <- 1 + }else if((qlod1-qlod2) < -LODdrop){ + edgetype <- -1 + }else{ edgetype <- 0; } + } else { + cat("Warning: Phenotype", data[3], "from", data[1], "no CTL/QTL information\n") + qlod2 <- NA; + } + #Store the results + results <- rbind(results, c(data[1], data[2], data[3], lod, edgetype, qlod1, qlod2)) + + if(nodefile == "" && !verbose){ }else{ + if(short){ + edge <- paste(name,traitsn[data[3]]) + edgeI <- paste(traitsn[data[3]],name) + if(!edge %in% edges && !edgeI %in% edges){ + cat(name, "\t", markern[data[2]],"\t", traitsn[data[3]],"\n",file=netfile, append=TRUE,sep="") + edges <- c(edges,edge) + } + }else{ + cat(name, "\t", "CTL_", data[1],"_",data[3], "\t", markern[data[2]],file=netfile, append=TRUE,sep="") + cat("\tCTL\t", lod, "\n", file=netfile, append=TRUE,sep="") + cat(markern[data[2]], "\t", "CTL_", data[1],"_",data[3], "\t",file=netfile, append=TRUE,sep="") + cat(traitsn[data[3]],"\tCTL\t", lod, "\n", file=netfile,append=TRUE,sep="") + } + } + all_m <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], rownames(CTLscan$dcor)[data[2]]) + all_p <- unique(c(all_p, name, traitsn[data[3]])) + } + colnames(results) <- c("TRAIT1","MARKER","TRAIT2","LOD_C","CAUSAL","LOD_T1","LOD_T2") + if(verbose) cat("NODE.DESCRIPTION\n") + if(nodefile == "" && !verbose){ }else{ + for(m in all_m){ cat(m,"\n", sep="", file=nodefile, append=TRUE); } + for(p in all_p){ cat(p,"\tPHENOTYPE\n", sep="", file=nodefile, append=TRUE); } + } + } + if(!is.null(results)){ + class(results) <- c(class(results),"CTLnetwork") + } + invisible(results) +} + +CTLnetwork.addmarker <- function(markers, mapinfo, name, realname){ + if(!missing(mapinfo)){ + id <- which(rownames(mapinfo) %in% realname) + fname <- paste(name,"\tMARKER\t",mapinfo[id,1],"\t",mapinfo[id,2],sep="") + markers <- unique(c(markers, fname)) + } + return(markers) +} + + +# generating network + + +ctl_network = CTLnetworkGn(ctls, significance = input$significance, LODdrop = 2,short = FALSE, add.qtls = FALSE, verbose = TRUE) + + + +json_data <- list(phenotypes = input$phenoData$trait_db_list,significance_data = ctl_significant,image_loc = imageLoc,ctl_plots=ctl_plots,network_file_name = network_file_name) + +json_data <- toJSON(json_data) + +write(json_data,file= json_file_path) + diff --git a/scripts/laminar/gn3-lint.sh b/scripts/laminar/gn3-lint.sh index b6f0c89..1299a96 100644 --- a/scripts/laminar/gn3-lint.sh +++ b/scripts/laminar/gn3-lint.sh @@ -4,7 +4,7 @@ set -e # Abort on first error CUR_DIR=$PWD GN3_CI_DIR=$HOME/CI/genenetwork3/ -cd $GN3_CI_DIR +cd "${GN3_CI_DIR}" git pull # Run Pylint @@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \ guix environment --load=guix.scm -- pylint sheepdog/worker.py gn3/ tests echo Done Running Pylint! -cd $CUR_DIR +cd "${CUR_DIR}" diff --git a/scripts/laminar/gn3-mypy.sh b/scripts/laminar/gn3-mypy.sh index a2a9782..6d04c35 100644 --- a/scripts/laminar/gn3-mypy.sh +++ b/scripts/laminar/gn3-mypy.sh @@ -4,7 +4,7 @@ set -e # Abort on first error CUR_DIR=$PWD GN3_CI_DIR=$HOME/CI/genenetwork3/ -cd $GN3_CI_DIR +cd "${GN3_CI_DIR}" git pull # Run Pylint @@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \ guix environment --load=guix.scm -- mypy . echo Done Running MyPy! -cd $CUR_DIR +cd "${CUR_DIR}" diff --git a/scripts/laminar/gn3-unittest.sh b/scripts/laminar/gn3-unittest.sh index 41dafe5..d18d5de 100644 --- a/scripts/laminar/gn3-unittest.sh +++ b/scripts/laminar/gn3-unittest.sh @@ -4,7 +4,7 @@ set -e # Abort on first error CUR_DIR=$PWD GN3_CI_DIR=$HOME/CI/genenetwork3/ -cd $GN3_CI_DIR +cd "${GN3_CI_DIR}" git pull # Run Pylint @@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \ guix environment --load=guix.scm -- python -m unittest discover echo Done Running Unittests! -cd $CUR_DIR +cd "${CUR_DIR}" diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py new file mode 100755 index 0000000..f203daa --- /dev/null +++ b/scripts/partial_correlations.py @@ -0,0 +1,59 @@ +import sys +import json +import traceback +from argparse import ArgumentParser + +from gn3.db_utils import database_connector +from gn3.responses.pcorrs_responses import OutputEncoder +from gn3.computations.partial_correlations import partial_correlations_entry + +def process_cli_arguments(): + parser = ArgumentParser() + parser.add_argument( + "primary_trait", + help="The primary trait's full name", + type=str) + parser.add_argument( + "control_traits", + help="A comma-separated list of traits' full names", + type=str) + parser.add_argument( + "method", + help="The correlation method to use", + type=str) + parser.add_argument( + "target_database", + help="The target database to run the partial correlations against", + type=str) + parser.add_argument( + "--criteria", + help="Number of results to return", + type=int, default=500) + return parser.parse_args() + +def cleanup_string(the_str): + return the_str.strip('"\t\n\r ') + +def run_partial_corrs(args): + with database_connector() as conn: + try: + return partial_correlations_entry( + conn, cleanup_string(args.primary_trait), + tuple(cleanup_string(args.control_traits).split(",")), + cleanup_string(args.method), args.criteria, + cleanup_string(args.target_database)) + except Exception as exc: + print(traceback.format_exc(), file=sys.stderr) + return { + "status": "exception", + "message": traceback.format_exc() + } + +def enter(): + args = process_cli_arguments() + print(json.dumps( + run_partial_corrs(process_cli_arguments()), + cls = OutputEncoder)) + +if __name__ == "__main__": + enter() diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R index eb660b5..ea2c345 100644 --- a/scripts/rqtl_wrapper.R +++ b/scripts/rqtl_wrapper.R @@ -172,7 +172,10 @@ verbose_print('Generating cross object\n') cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type) # Calculate genotype probabilities -if (!is.null(opt$interval)) { +if (!is.null(opt$pairscan)) { + verbose_print('Calculating genotype probabilities for pair-scan\n') + cross_object <- calc.genoprob(cross_object, step=10) +} else if (!is.null(opt$interval)) { verbose_print('Calculating genotype probabilities with interval mapping\n') cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max") } else if (!is.null(opt$pairscan)) { @@ -207,14 +210,19 @@ if (!is.null(opt$addcovar)) { name <- paste0("T_", covarDescr[x, 1]) # The covar description file doesn't have T_ in trait names (the cross object does) type <- covarDescr[x, 2] if(type == "categorical"){ - if(length(table(covars[,name])) > 2){ # More then 2 levels create the model matrix for the factor - mdata <- data.frame(toExpand = as.factor(covars[, name])) + verbose_print('Binding covars to covars\n') + if (nrow(covarDescr) < 2){ + this_covar = covars + } else { + this_covar = covars[,name] + } + if(length(table(this_covar)) > 2){ # More then 2 levels create the model matrix for the factor + mdata <- data.frame(toExpand = as.factor(this_covar)) options(na.action='na.pass') modelmatrix <- model.matrix(~ toExpand + 0, mdata)[,-1] covars <- cbind(covars, modelmatrix) }else{ # 2 levels? just bind the trait as covar - verbose_print('Binding covars to covars\n') - covars <- cbind(covars, covars[,name]) + covars <- cbind(covars, this_covar) } } } @@ -236,10 +244,12 @@ if (!is.null(opt$control)) { } if (!is.null(opt$pairscan)) { + verbose_print("Running scantwo") scan_func <- function(...){ scantwo(...) } } else { + verbose_print("Running scanone") scan_func <- function(...){ scanone(...) } diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index b0d25a9..d368013 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -25,6 +25,7 @@ if (length(args)==0) { inputData <- fromJSON(file = json_file_path) imgDir = inputData$TMPDIR +inputData trait_sample_data <- do.call(rbind, inputData$trait_sample_data) @@ -51,10 +52,8 @@ names(dataExpr) = inputData$trait_names # Allow multi-threading within WGCNA enableWGCNAThreads() -# choose softthreshhold (Calculate soft threshold) -# xtodo allow users to pass args - -powers <- c(c(1:10), seq(from = 12, to=20, by=2)) +# powers <- c(c(1:10), seq(from = 12, to=20, by=2)) +powers <- unlist(c(inputData$SoftThresholds)) sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) # check the power estimate |