about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/ctl_analysis.R217
-rw-r--r--scripts/laminar/gn3-lint.sh4
-rw-r--r--scripts/laminar/gn3-mypy.sh4
-rw-r--r--scripts/laminar/gn3-unittest.sh4
-rwxr-xr-xscripts/partial_correlations.py59
-rw-r--r--scripts/rqtl_wrapper.R20
-rw-r--r--scripts/wgcna_analysis.R7
7 files changed, 300 insertions, 15 deletions
diff --git a/scripts/ctl_analysis.R b/scripts/ctl_analysis.R
new file mode 100644
index 0000000..e06ac25
--- /dev/null
+++ b/scripts/ctl_analysis.R
@@ -0,0 +1,217 @@
+
+library(ctl)
+library(stringi);
+library(rjson)
+
+options(stringsAsFactors = FALSE);
+
+args = commandArgs(trailingOnly=TRUE)
+
+imgDir = Sys.getenv("GENERATED_IMAGE_DIR")
+
+if (length(args)==0) {
+  stop("Argument for the data file", call.=FALSE)
+} else {
+
+  json_file_path = args[1]
+}
+
+json_file_path
+# add validation for the files
+input <- fromJSON(file = json_file_path)
+
+
+cat("The input data is \n")
+
+
+genoData <- input$genoData
+phenoData <- input$phenoData
+
+parametric <- switch(
+  input$parametric,
+  "True" = TRUE,
+  "False" = FALSE
+
+  )  
+# create the matixes
+
+# genotypes Matrix of genotypes. (individuals x markers)
+# phenotypes Matrix of phenotypes. (individuals x phenotypes)
+
+geno_matrix = t(matrix(unlist(genoData$genotypes),
+	nrow=length(genoData$markernames), ncol=length(genoData$individuals),
+	dimnames=list(genoData$markernames, genoData$individuals), byrow=TRUE))
+
+
+pheno_matrix = t(matrix(as.numeric(unlist(phenoData$traits)), nrow=length(phenoData$trait_db_list), ncol=length(
+    phenoData$individuals), dimnames= list(phenoData$trait_db_list, phenoData$individuals), byrow=TRUE))
+
+ctls <- CTLscan(geno_matrix,pheno_matrix,nperm=input$nperm,strategy=input$strategy,parametric=parametric,nthreads=3,verbose=TRUE)
+
+genImageRandStr <- function(prefix){
+
+	randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+	return(paste(randStr,".png",sep=""))
+}
+
+
+genRandomFileName <- function(prefix,file_ext=".png"){
+
+	randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+	return(paste(randStr,file_ext,sep=""))
+}
+
+
+# #output matrix significant CTL interactions with 4 columns: trait, marker, trait, lod
+ctl_significant <- CTLsignificant(ctls,significance = input$significance)
+ 
+colnames(ctl_significant) = c("trait","marker","trait_2","LOD","dcor")
+
+
+
+imageLoc = file.path(input$imgDir,genRandomFileName("CTLline"))
+
+png(imageLoc,width=1000,height=600,type='cairo-png')
+
+# Create the lineplot
+ctl.lineplot(ctls,significance = input$significance, gap = 50, 
+col = "orange", bg.col = "lightgray", cex = 1, verbose = FALSE)
+
+dev.off()
+
+
+n = 2
+ctl_plots = c()
+
+for (trait in phenoData$trait_db_list)
+{
+	image_loc = file.path(input$imgDir,genRandomFileName(paste("CTL",n,sep="")))
+	png(image_loc,width=1000, height=600, type='cairo-png')
+  plot.CTLobject(ctls,n-1,significance= input$significance, main=paste("Phenotype",trait,sep=""))
+
+  ctl_plots = append(ctl_plots,image_loc)
+
+  dev.off()
+  n = n + 1
+
+}
+
+
+
+network_file_name = paste("ctlnet",stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+netfile =  file.path(input$imgDir,paste(network_file_name,".sif",sep=""))
+
+nodefile = file.path(input$imgDir,paste(network_file_name,".nodes",sep=""))
+
+# fn overrides ctlnetwork function to target gn2 use case
+
+CTLnetworkGn<- function(CTLobject, mapinfo, significance = 0.05, LODdrop = 2, what = c("names","ids"), short = FALSE, add.qtls = FALSE,verbose = TRUE){
+  if(missing(CTLobject) || is.null(CTLobject)) stop("argument 'CTLobject' is missing, with no default")
+  if("CTLscan" %in% class(CTLobject)) CTLobject = list(CTLobject)
+  if(length(what) > 1) what = what[1]
+
+  results <- NULL
+  significant <- CTLsignificant(CTLobject, significance, what = "ids")
+  if(!is.null(significant)){
+    all_m <- NULL; all_p <- NULL;
+
+
+    cat("",file=netfile); cat("",file=nodefile);
+    if(verbose) cat("NETWORK.SIF\n")
+    edges <- NULL
+    for(x in 1:nrow(significant)){
+      data    <- as.numeric(significant[x,])
+      CTLscan <- CTLobject[[data[1]]]
+      markern <- rownames(CTLscan$dcor)
+      traitsn <- colnames(CTLscan$dcor)
+      name    <- ctl.name(CTLscan)
+      if(what=="ids"){
+        tid     <- which(traitsn %in% ctl.name(CTLobject[[data[1]]]))
+        name    <- paste("P",tid,sep="")
+        markern <- paste("M",1:nrow(CTLobject[[data[1]]]$dcor), sep="")
+        traitsn <- paste("P", 1:ncol(CTLobject[[data[1]]]$dcor), sep="")
+      }
+      if(add.qtls){ # Add QTL to the output SIF
+        bfc    <- length(CTLscan$qtl)
+        above  <- which(CTLscan$qtl > -log10(significance))
+        qtlnms <- names(above); qtlmid <- 1
+        for(m in above){
+          cat(name,"\tQTL\t",markern[m],"\tQTL\t",CTLscan$qtl[m],"\n",sep="",file=netfile,append=TRUE)
+          all_m  <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], qtlnms[qtlmid])
+          qtlmid <- qtlmid+1
+        }
+      }
+      lod <- CTLscan$ctl[data[2],data[3]]
+      qlod1    <- CTLscan$qtl[data[2]]
+      qlod2    <- qlod1
+      edgetype <- NA
+      if(length(CTLobject) >= data[3]){  # Edge type based on QTL LOD scores
+        qlod2 <- CTLobject[[data[3]]]$qtl[data[2]]
+        if((qlod1-qlod2) > LODdrop){
+          edgetype <- 1
+        }else if((qlod1-qlod2) < -LODdrop){
+          edgetype <- -1
+        }else{ edgetype <- 0; }
+      } else { 
+        cat("Warning: Phenotype", data[3], "from", data[1], "no CTL/QTL information\n")
+        qlod2 <- NA; 
+      }
+      #Store the results
+      results <- rbind(results, c(data[1], data[2], data[3], lod, edgetype, qlod1, qlod2))
+
+      if(nodefile == "" && !verbose){ }else{
+        if(short){
+          edge <- paste(name,traitsn[data[3]])
+          edgeI <- paste(traitsn[data[3]],name)
+          if(!edge %in% edges && !edgeI %in% edges){
+            cat(name, "\t", markern[data[2]],"\t", traitsn[data[3]],"\n",file=netfile, append=TRUE,sep="")
+            edges <- c(edges,edge)
+          }
+        }else{
+          cat(name, "\t", "CTL_", data[1],"_",data[3], "\t", markern[data[2]],file=netfile, append=TRUE,sep="")
+          cat("\tCTL\t", lod, "\n", file=netfile, append=TRUE,sep="")
+          cat(markern[data[2]], "\t", "CTL_", data[1],"_",data[3], "\t",file=netfile, append=TRUE,sep="")
+          cat(traitsn[data[3]],"\tCTL\t", lod, "\n", file=netfile,append=TRUE,sep="")
+        }
+      }
+      all_m <- CTLnetwork.addmarker(all_m, mapinfo, markern[data[2]], rownames(CTLscan$dcor)[data[2]])
+      all_p <- unique(c(all_p, name, traitsn[data[3]]))
+    }
+    colnames(results) <- c("TRAIT1","MARKER","TRAIT2","LOD_C","CAUSAL","LOD_T1","LOD_T2")
+    if(verbose) cat("NODE.DESCRIPTION\n")
+    if(nodefile == "" && !verbose){ }else{
+      for(m in all_m){ cat(m,"\n",    sep="", file=nodefile, append=TRUE); }
+      for(p in all_p){ cat(p,"\tPHENOTYPE\n", sep="", file=nodefile, append=TRUE); }
+    }
+  }
+  if(!is.null(results)){
+    class(results) <- c(class(results),"CTLnetwork")
+  }
+  invisible(results)
+}
+
+CTLnetwork.addmarker <- function(markers, mapinfo, name, realname){
+  if(!missing(mapinfo)){
+    id      <- which(rownames(mapinfo) %in% realname)
+    fname   <- paste(name,"\tMARKER\t",mapinfo[id,1],"\t",mapinfo[id,2],sep="")
+    markers <- unique(c(markers, fname))
+  }
+  return(markers)
+}
+
+
+# generating network
+
+
+ctl_network = CTLnetworkGn(ctls, significance = input$significance, LODdrop = 2,short = FALSE, add.qtls = FALSE, verbose = TRUE)
+
+
+
+json_data <- list(phenotypes = input$phenoData$trait_db_list,significance_data = ctl_significant,image_loc = imageLoc,ctl_plots=ctl_plots,network_file_name =  network_file_name)
+
+json_data <- toJSON(json_data)
+
+write(json_data,file= json_file_path)
+
diff --git a/scripts/laminar/gn3-lint.sh b/scripts/laminar/gn3-lint.sh
index b6f0c89..1299a96 100644
--- a/scripts/laminar/gn3-lint.sh
+++ b/scripts/laminar/gn3-lint.sh
@@ -4,7 +4,7 @@ set -e  # Abort on first error
 CUR_DIR=$PWD
 GN3_CI_DIR=$HOME/CI/genenetwork3/
 
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
 git pull
 
 # Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
     guix environment --load=guix.scm -- pylint sheepdog/worker.py gn3/ tests
 
 echo Done Running Pylint!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/laminar/gn3-mypy.sh b/scripts/laminar/gn3-mypy.sh
index a2a9782..6d04c35 100644
--- a/scripts/laminar/gn3-mypy.sh
+++ b/scripts/laminar/gn3-mypy.sh
@@ -4,7 +4,7 @@ set -e  # Abort on first error
 CUR_DIR=$PWD
 GN3_CI_DIR=$HOME/CI/genenetwork3/
 
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
 git pull
 
 # Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
     guix environment --load=guix.scm -- mypy .
 
 echo Done Running MyPy!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/laminar/gn3-unittest.sh b/scripts/laminar/gn3-unittest.sh
index 41dafe5..d18d5de 100644
--- a/scripts/laminar/gn3-unittest.sh
+++ b/scripts/laminar/gn3-unittest.sh
@@ -4,7 +4,7 @@ set -e  # Abort on first error
 CUR_DIR=$PWD
 GN3_CI_DIR=$HOME/CI/genenetwork3/
 
-cd $GN3_CI_DIR
+cd "${GN3_CI_DIR}"
 git pull
 
 # Run Pylint
@@ -12,4 +12,4 @@ env GUIX_PACKAGE_PATH="$HOME/guix-bioinformatics:$HOME/guix-past/modules" \
     guix environment --load=guix.scm -- python -m unittest discover
 
 echo Done Running Unittests!
-cd $CUR_DIR
+cd "${CUR_DIR}"
diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py
new file mode 100755
index 0000000..f203daa
--- /dev/null
+++ b/scripts/partial_correlations.py
@@ -0,0 +1,59 @@
+import sys
+import json
+import traceback
+from argparse import ArgumentParser
+
+from gn3.db_utils import database_connector
+from gn3.responses.pcorrs_responses import OutputEncoder
+from gn3.computations.partial_correlations import partial_correlations_entry
+
+def process_cli_arguments():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "primary_trait",
+        help="The primary trait's full name",
+        type=str)
+    parser.add_argument(
+        "control_traits",
+        help="A comma-separated list of traits' full names",
+        type=str)
+    parser.add_argument(
+        "method",
+        help="The correlation method to use",
+        type=str)
+    parser.add_argument(
+        "target_database",
+        help="The target database to run the partial correlations against",
+        type=str)
+    parser.add_argument(
+        "--criteria",
+        help="Number of results to return",
+        type=int, default=500)
+    return parser.parse_args()
+
+def cleanup_string(the_str):
+    return the_str.strip('"\t\n\r ')
+
+def run_partial_corrs(args):
+    with database_connector() as conn:
+        try:
+            return partial_correlations_entry(
+                conn, cleanup_string(args.primary_trait),
+                tuple(cleanup_string(args.control_traits).split(",")),
+                cleanup_string(args.method), args.criteria,
+                cleanup_string(args.target_database))
+        except Exception as exc:
+            print(traceback.format_exc(), file=sys.stderr)
+            return {
+                "status": "exception",
+                "message": traceback.format_exc()
+            }
+
+def enter():
+    args = process_cli_arguments()
+    print(json.dumps(
+        run_partial_corrs(process_cli_arguments()),
+        cls = OutputEncoder))
+
+if __name__ == "__main__":
+    enter()
diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index eb660b5..ea2c345 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -172,7 +172,10 @@ verbose_print('Generating cross object\n')
 cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file, type)
 
 # Calculate genotype probabilities
-if (!is.null(opt$interval)) {
+if (!is.null(opt$pairscan)) {
+  verbose_print('Calculating genotype probabilities for pair-scan\n')
+  cross_object <- calc.genoprob(cross_object, step=10)
+} else if (!is.null(opt$interval)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
 } else if (!is.null(opt$pairscan)) {
@@ -207,14 +210,19 @@ if (!is.null(opt$addcovar)) {
     name <- paste0("T_", covarDescr[x, 1]) # The covar description file doesn't have T_ in trait names (the cross object does)
     type <- covarDescr[x, 2]
     if(type == "categorical"){
-      if(length(table(covars[,name])) > 2){ # More then 2 levels create the model matrix for the factor
-        mdata <- data.frame(toExpand = as.factor(covars[, name]))
+      verbose_print('Binding covars to covars\n')
+      if (nrow(covarDescr) < 2){
+        this_covar = covars
+      } else {
+        this_covar = covars[,name]
+      }
+      if(length(table(this_covar)) > 2){ # More then 2 levels create the model matrix for the factor
+        mdata <- data.frame(toExpand = as.factor(this_covar))
         options(na.action='na.pass')
         modelmatrix <- model.matrix(~ toExpand + 0, mdata)[,-1]
         covars <- cbind(covars, modelmatrix)
       }else{ # 2 levels? just bind the trait as covar
-        verbose_print('Binding covars to covars\n')
-        covars <- cbind(covars, covars[,name])
+        covars <- cbind(covars, this_covar)
       }
     }
   }
@@ -236,10 +244,12 @@ if (!is.null(opt$control)) {
 }
 
 if (!is.null(opt$pairscan)) {
+  verbose_print("Running scantwo")
   scan_func <- function(...){
     scantwo(...)
   }
 } else {
+  verbose_print("Running scanone")
   scan_func <- function(...){
     scanone(...)
   }
diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R
index b0d25a9..d368013 100644
--- a/scripts/wgcna_analysis.R
+++ b/scripts/wgcna_analysis.R
@@ -25,6 +25,7 @@ if (length(args)==0) {
 inputData <- fromJSON(file = json_file_path)
 imgDir = inputData$TMPDIR
 
+inputData
 
 trait_sample_data <- do.call(rbind, inputData$trait_sample_data)
 
@@ -51,10 +52,8 @@ names(dataExpr) = inputData$trait_names
 # Allow multi-threading within WGCNA
 enableWGCNAThreads()
 
-# choose softthreshhold (Calculate soft threshold)
-# xtodo allow users to pass args
-
-powers <- c(c(1:10), seq(from = 12, to=20, by=2)) 
+# powers <- c(c(1:10), seq(from = 12, to=20, by=2)) 
+powers <- unlist(c(inputData$SoftThresholds))
 sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5)
 
 # check the power estimate