aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/ctl_analysis.R217
-rw-r--r--scripts/laminar/gn3-lint.sh4
-rw-r--r--scripts/laminar/gn3-mypy.sh4
-rw-r--r--scripts/laminar/gn3-unittest.sh4
-rwxr-xr-xscripts/partial_correlations.py59
-rw-r--r--scripts/rqtl_wrapper.R20
-rw-r--r--scripts/wgcna_analysis.R7
7 files changed, 300 insertions, 15 deletions
diff --git a/scripts/ctl_analysis.R b/scripts/ctl_analysis.R
new file mode 100644
index 0000000..e06ac25
--- /dev/null
+++ b/scripts/ctl_analysis.R
@@ -0,0 +1,217 @@
+
+library(ctl)
+library(stringi);
+library(rjson)
+
+options(stringsAsFactors = FALSE);
+
+args = commandArgs(trailingOnly=TRUE)
+
+imgDir = Sys.getenv("GENERATED_IMAGE_DIR")
+
+if (length(args)==0) {
+ stop("Argument for the data file", call.=FALSE)
+} else {
+
+ json_file_path = args[1]
+}
+
+json_file_path
+# add validation for the files
+input <- fromJSON(file = json_file_path)
+
+
+cat("The input data is \n")
+
+
+genoData <- input$genoData
+phenoData <- input$phenoData
+
+parametric <- switch(
+ input$parametric,
+ "True" = TRUE,
+ "False" = FALSE
+
+ )
+# create the matixes
+
+# genotypes Matrix of genotypes. (individuals x markers)
+# phenotypes Matrix of phenotypes. (individuals x phenotypes)
+
+geno_matrix = t(matrix(unlist(genoData$genotypes),
+ nrow=length(genoData$markernames), ncol=length(genoData$individuals),
+ dimnames=list(genoData$markernames, genoData$individuals), byrow=TRUE))
+
+
+pheno_matrix = t(matrix(as.numeric(unlist(phenoData$traits)), nrow=length(phenoData$trait_db_list), ncol=length(
+ phenoData$individuals), dimnames= list(phenoData$trait_db_list, phenoData$individuals), byrow=TRUE))
+
+ctls <- CTLscan(geno_matrix,pheno_matrix,nperm=input$nperm,strategy=input$strategy,parametric=parametric,nthreads=3,verbose=TRUE)
+
+genImageRandStr <- function(prefix){
+
+ randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+ return(paste(randStr,".png",sep=""))
+}
+
+
+genRandomFileName <- function(prefix,file_ext=".png"){
+
+ randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+ return(paste(randStr,file_ext,sep=""))
+}
+
+
+# #output matrix significant CTL interactions with 4 columns: trait, marker, trait, lod
+ctl_significant <- CTLsignificant(ctls,significance = input$significance)
+
+colnames(ctl_significant) = c("trait","marker","trait_2","LOD","dcor")
+
+
+
+imageLoc = file.path(input$imgDir,genRandomFileName("CTLline"))
+
+png(imageLoc,width=1000,height=600,type='cairo-png')
+
+# Create the lineplot
+ctl.lineplot(ctls,significance = input$significance, gap = 50,
+col = "orange", bg.col = "lightgray", cex = 1, verbose = FALSE)
+
+dev.off()
+
+
+n = 2
+ctl_plots = c()
+
+for (trait in phenoData$trait_db_list)
+{
+ image_loc = file.path(input$imgDir,genRandomFileName(paste("CTL",n,sep="")))
+ png(image_loc,width=1000, height=600, type='cairo-png')
+ plot.CTLobject(ctls,n-1,significance= input$significance, main=paste("Phenotype",trait,sep=""))
+
+ ctl_plots = append(ctl_plots,image_loc)
+
+ dev.off()
+ n = n + 1
+
+}
+
+
+
+network_file_name = paste("ctlnet",stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+netfile = file.path(input$imgDir,paste(network_file_name,".sif",sep=""))
+
+nodefile = file.path(input$imgDir,paste(network_file_name,".nodes",sep=""))
+
+# fn overrides ctlnetwork function to target gn2 use case
+
+CTLnetworkGn<- function(CTLobject, mapinfo, significance = 0.05, LODdrop = 2, what = c("names","ids"), short = FALSE, add.qtls = FALSE,verbose = TRUE){
+ if(missing(CTLobject) || is.null(CTLobject)) stop("argument 'CTLobject' is missing, with no default")
+ if("CTLscan" %in% class(CTLobject)) CTLobject = list(CTLobject)
+ if(length(what) > 1) what = what[1]
+
+ results <- NULL
+ significant <- CTLsignificant(CTLobject, significance, what = "ids")
+ if(!is.null(significant)){
+ all_m <- NULL; all_p <- NULL;
+
+
+ cat("",file=netfile); cat("",file=nodefile);
+ if(verbose) cat("NETWORK.SIF\n")
+ edges <- NULL
+ for(x in 1:nrow(significant)){
+ data <- as.numeric(significant[x,])
+ CTLscan <- CTLobject[[data[1]]]
+ markern <- rownames(CTLscan$dcor)
+ traitsn <- colnames(CTLscan$dcor)
+ name <- ctl.name(CTLscan)
+ if(what=="ids"){
+ tid <- which(traitsn %in% ctl.name(CTLobject[[data[1]]]))
+ name <- paste("P",tid,sep="")
+ markern <- paste("M",1:nrow(CTLobject[[data[1]]]$dcor), sep="")
+ traitsn <- paste("P", 1:ncol(CTLobject[[data[1]]]$dcor), sep="")
+ }
+ if(add.qtls){ # Add QTL to the output SIF
+ bfc <- length(CTLscan$qtl)
+ above <- which(CTLscan$qtl > -log10(significance))
+ qtlnms <- names(above); qtlmid <- 1
+ for(m in above){
+ cat(name,"\tQTL\t",markern[m],"\tQTL\t",CTLscan$qtl[m],"\n",sep="",file=netfile,append=TRUE)
+ all_m <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], qtlnms[qtlmid])
+ qtlmid <- qtlmid+1
+ }
+ }
+ lod <- CTLscan$ctl[data[2],data[3]]
+ qlod1 <- CTLscan$qtl[data[2]]
+ qlod2 <- qlod1
+ edgetype <- NA
+ if(length(CTLobject) >= data[3]){ # Edge type based on QTL LOD scores
+ qlod2 <- CTLobject[[data[3]]]$qtl[data[2]]
+ if((qlod1-qlod2) > LODdrop){
+ edgetype <- 1
+ }else if((qlod1-qlod2) < -LODdrop){
+ edgetype <- -1
+ }else{ edgetype <- 0; }
+ } else {
+ cat("Warning: Phenotype", data[3], "from", data[1], "no CTL/QTL information\n")
+ qlod2 <- NA;
+ }
+ #Store the results
+ results <- rbind(results, c(data[1], data[2], data[3], lod, edgetype, qlod1, qlod2))
+
+ if(nodefile == "" && !verbose){ }else{
+ if(short){
+ edge <- paste(name,traitsn[data[3]])
+ edgeI <- paste(traitsn[data[3]],name)
+ if(!edge %in% edges && !edgeI %in% edges){
+ cat(name, "\t", markern[data[2]],"\t", traitsn[data[3]],"\n",file=netfile, append=TRUE,sep="")
+ edges <- c(edges,edge)
+ }
+ }else{
+ cat(name, "\t", "CTL_", data[1],"_",data[3], "\t", markern[data[2]],file=netfile, append=TRUE,sep="")
+ cat("\tCTL\t", lod, "\n", file=netfile, append=TRUE,sep="")
+ cat(markern[data[2]], "\t", "CTL_", data[1],"_",data[3], "\t",file=netfile, append=TRUE,sep="")
+ cat(traitsn[data[3]],"\tCTL\t", lod, "\n", file=netfile,append=TRUE,sep="")
+ }
+ }
+ all_m <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], rownames(CTLscan$dcor)[data[2]])
+ all_p <- unique(c(all_p, name, traitsn[data[3]]))
+ }
+ colnames(results) <- c("TRAIT1","MARKER","TRAIT2","LOD_C","CAUSAL","LOD_T1","LOD_T2")
+ if(verbose) cat("NODE.DESCRIPTION\n")
+ if(nodefile == "" && !verbose){ }else{
+ for(m in all_m){ cat(m,"\n", sep="", file=nodefile, append=TRUE); }
+ for(p in all_p){ cat(p,"\tPHENOTYPE\n", sep="", file=nodefile, append=TRUE); }
+ }
+ }
+ if(!is.null(results)){
+ class(results) <- c(class(results),"CTLnetwork")
+ }
+ invisible(results)
+}
+
+CTLnetwork.addmarker <- function(markers, mapinfo, name, realname){
+ if(!missing(mapinfo)){
+ id <- which(rownames(mapinfo) %in% realname)
+ fname <- paste(name,"\tMARKER\t",mapinfo[id,1],"\t",mapinfo[id,2],sep="")
+ markers <- unique(c(markers, fname))
+ }
+ return(markers)
+}
+
+
+# generating network
+
+
+ctl_network = CTLnetworkGn(ctls, significance = input$significance, LODdrop = 2,short = FALSE, add.qtls = FALSE, verbose = TRUE)
+
+
+
+json_data <- list(phenotypes = input$phenoData$trait_db_list,significance_data = ctl_significant,image_loc = imageLoc,ctl_plots=ctl_plots,network_file_name = network_file_name)
+
+json_data <- toJSON(json_data)
+
+write(json_data,file= json_file_path)
+
diff --git a/scripts/laminar/gn3-lint.sh b/scripts/laminar/gn3-lint.sh
index b6f0c89..1299a96 100644
--- a/scripts/laminar/gn3-lint.sh
+++ b/scripts/laminar/gn3-lint.sh
@@ -4,7 +4,7 @@ set -e # Abort on first error
CUR_DIR=$PWD
GN3_CI_DIR=$HOME/CI/genenetwork3/
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
git pull
# Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
guix environment --load=guix.scm -- pylint sheepdog/worker.py gn3/ tests
echo Done Running Pylint!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/laminar/gn3-mypy.sh b/scripts/laminar/gn3-mypy.sh
index a2a9782..6d04c35 100644
--- a/scripts/laminar/gn3-mypy.sh
+++ b/scripts/laminar/gn3-mypy.sh
@@ -4,7 +4,7 @@ set -e # Abort on first error
CUR_DIR=$PWD
GN3_CI_DIR=$HOME/CI/genenetwork3/
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
git pull
# Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
guix environment --load=guix.scm -- mypy .
echo Done Running MyPy!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/laminar/gn3-unittest.sh b/scripts/laminar/gn3-unittest.sh
index 41dafe5..d18d5de 100644
--- a/scripts/laminar/gn3-unittest.sh
+++ b/scripts/laminar/gn3-unittest.sh
@@ -4,7 +4,7 @@ set -e # Abort on first error
CUR_DIR=$PWD
GN3_CI_DIR=$HOME/CI/genenetwork3/
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
git pull
# Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
guix environment --load=guix.scm -- python -m unittest discover
echo Done Running Unittests!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py
new file mode 100755
index 0000000..f203daa
--- /dev/null
+++ b/scripts/partial_correlations.py
@@ -0,0 +1,59 @@
+import sys
+import json
+import traceback
+from argparse import ArgumentParser
+
+from gn3.db_utils import database_connector
+from gn3.responses.pcorrs_responses import OutputEncoder
+from gn3.computations.partial_correlations import partial_correlations_entry
+
+def process_cli_arguments():
+ parser = ArgumentParser()
+ parser.add_argument(
+ "primary_trait",
+ help="The primary trait's full name",
+ type=str)
+ parser.add_argument(
+ "control_traits",
+ help="A comma-separated list of traits' full names",
+ type=str)
+ parser.add_argument(
+ "method",
+ help="The correlation method to use",
+ type=str)
+ parser.add_argument(
+ "target_database",
+ help="The target database to run the partial correlations against",
+ type=str)
+ parser.add_argument(
+ "--criteria",
+ help="Number of results to return",
+ type=int, default=500)
+ return parser.parse_args()
+
+def cleanup_string(the_str):
+ return the_str.strip('"\t\n\r ')
+
+def run_partial_corrs(args):
+ with database_connector() as conn:
+ try:
+ return partial_correlations_entry(
+ conn, cleanup_string(args.primary_trait),
+ tuple(cleanup_string(args.control_traits).split(",")),
+ cleanup_string(args.method), args.criteria,
+ cleanup_string(args.target_database))
+ except Exception as exc:
+ print(traceback.format_exc(), file=sys.stderr)
+ return {
+ "status": "exception",
+ "message": traceback.format_exc()
+ }
+
+def enter():
+ args = process_cli_arguments()
+ print(json.dumps(
+ run_partial_corrs(process_cli_arguments()),
+ cls = OutputEncoder))
+
+if __name__ == "__main__":
+ enter()
diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index eb660b5..ea2c345 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -172,7 +172,10 @@ verbose_print('Generating cross object\n')
cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type)
# Calculate genotype probabilities
-if (!is.null(opt$interval)) {
+if (!is.null(opt$pairscan)) {
+ verbose_print('Calculating genotype probabilities for pair-scan\n')
+ cross_object <- calc.genoprob(cross_object, step=10)
+} else if (!is.null(opt$interval)) {
verbose_print('Calculating genotype probabilities with interval mapping\n')
cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
} else if (!is.null(opt$pairscan)) {
@@ -207,14 +210,19 @@ if (!is.null(opt$addcovar)) {
name <- paste0("T_", covarDescr[x, 1]) # The covar description file doesn't have T_ in trait names (the cross object does)
type <- covarDescr[x, 2]
if(type == "categorical"){
- if(length(table(covars[,name])) > 2){ # More then 2 levels create the model matrix for the factor
- mdata <- data.frame(toExpand = as.factor(covars[, name]))
+ verbose_print('Binding covars to covars\n')
+ if (nrow(covarDescr) < 2){
+ this_covar = covars
+ } else {
+ this_covar = covars[,name]
+ }
+ if(length(table(this_covar)) > 2){ # More then 2 levels create the model matrix for the factor
+ mdata <- data.frame(toExpand = as.factor(this_covar))
options(na.action='na.pass')
modelmatrix <- model.matrix(~ toExpand + 0, mdata)[,-1]
covars <- cbind(covars, modelmatrix)
}else{ # 2 levels? just bind the trait as covar
- verbose_print('Binding covars to covars\n')
- covars <- cbind(covars, covars[,name])
+ covars <- cbind(covars, this_covar)
}
}
}
@@ -236,10 +244,12 @@ if (!is.null(opt$control)) {
}
if (!is.null(opt$pairscan)) {
+ verbose_print("Running scantwo")
scan_func <- function(...){
scantwo(...)
}
} else {
+ verbose_print("Running scanone")
scan_func <- function(...){
scanone(...)
}
diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R
index b0d25a9..d368013 100644
--- a/scripts/wgcna_analysis.R
+++ b/scripts/wgcna_analysis.R
@@ -25,6 +25,7 @@ if (length(args)==0) {
inputData <- fromJSON(file = json_file_path)
imgDir = inputData$TMPDIR
+inputData
trait_sample_data <- do.call(rbind, inputData$trait_sample_data)
@@ -51,10 +52,8 @@ names(dataExpr) = inputData$trait_names
# Allow multi-threading within WGCNA
enableWGCNAThreads()
-# choose softthreshhold (Calculate soft threshold)
-# xtodo allow users to pass args
-
-powers <- c(c(1:10), seq(from = 12, to=20, by=2))
+# powers <- c(c(1:10), seq(from = 12, to=20, by=2))
+powers <- unlist(c(inputData$SoftThresholds))
sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5)
# check the power estimate