aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-10-25 15:13:05 +0300
committerFrederick Muriuki Muriithi2021-10-25 15:13:05 +0300
commit5a472ebab04c68cd5228f253cc98d0ae22a520d7 (patch)
tree3d6b1a5a8933896a6e7fdc98f473ef069accd348
parent0814eea6b57e45d4337424e63c164d204d03b64d (diff)
parent5440bfcd6940db08c4479a39ba66dbc802b2c426 (diff)
downloadgenenetwork3-5a472ebab04c68cd5228f253cc98d0ae22a520d7.tar.gz
Merge branch 'main' of github.com:genenetwork/genenetwork3 into partial-correlations
-rw-r--r--gn3/computations/wgcna.py49
-rw-r--r--guix.scm1
-rw-r--r--scripts/wgcna_analysis.R17
-rw-r--r--tests/unit/computations/test_wgcna.py14
4 files changed, 73 insertions, 8 deletions
diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py
index fd508fa..ab12fe7 100644
--- a/gn3/computations/wgcna.py
+++ b/gn3/computations/wgcna.py
@@ -3,8 +3,11 @@
import os
import json
import uuid
-from gn3.settings import TMPDIR
+import subprocess
+import base64
+
+from gn3.settings import TMPDIR
from gn3.commands import run_cmd
@@ -14,12 +17,46 @@ def dump_wgcna_data(request_data: dict):
temp_file_path = os.path.join(TMPDIR, filename)
+ request_data["TMPDIR"] = TMPDIR
+
with open(temp_file_path, "w") as output_file:
json.dump(request_data, output_file)
return temp_file_path
+def stream_cmd_output(socketio, request_data, cmd: str):
+ """function to stream in realtime"""
+ # xtodo syncing and closing /edge cases
+
+ socketio.emit("output", {"data": f"calling you script {cmd}"},
+ namespace="/", room=request_data["socket_id"])
+ results = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+
+ if results.stdout is not None:
+
+ for line in iter(results.stdout.readline, b""):
+ socketio.emit("output",
+ {"data": line.decode("utf-8").rstrip()},
+ namespace="/", room=request_data["socket_id"])
+
+ socketio.emit(
+ "output", {"data":
+ "parsing the output results"}, namespace="/",
+ room=request_data["socket_id"])
+
+
+def process_image(image_loc: str) -> bytes:
+ """encode the image"""
+
+ try:
+ with open(image_loc, "rb") as image_file:
+ return base64.b64encode(image_file.read())
+ except FileNotFoundError:
+ return b""
+
+
def compose_wgcna_cmd(rscript_path: str, temp_file_path: str):
"""function to componse wgcna cmd"""
# (todo):issue relative paths to abs paths
@@ -32,6 +69,8 @@ def call_wgcna_script(rscript_path: str, request_data: dict):
generated_file = dump_wgcna_data(request_data)
cmd = compose_wgcna_cmd(rscript_path, generated_file)
+ # stream_cmd_output(request_data, cmd) disable streaming of data
+
try:
run_cmd_results = run_cmd(cmd)
@@ -40,8 +79,14 @@ def call_wgcna_script(rscript_path: str, request_data: dict):
if run_cmd_results["code"] != 0:
return run_cmd_results
+
+ output_file_data = json.load(outputfile)
+ output_file_data["output"]["image_data"] = process_image(
+ output_file_data["output"]["imageLoc"]).decode("ascii")
+ # json format only supports unicode string// to get image data reconvert
+
return {
- "data": json.load(outputfile),
+ "data": output_file_data,
**run_cmd_results
}
except FileNotFoundError:
diff --git a/guix.scm b/guix.scm
index 9b8f399..d8b1596 100644
--- a/guix.scm
+++ b/guix.scm
@@ -100,6 +100,7 @@
("python-redis" ,python-redis)
("python-requests" ,python-requests)
("python-scipy" ,python-scipy)
+ ("python-flask-socketio" ,python-flask-socketio)
("python-sqlalchemy-stubs"
,python-sqlalchemy-stubs)
("r-optparse" ,r-optparse)
diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R
index 17b3537..b0d25a9 100644
--- a/scripts/wgcna_analysis.R
+++ b/scripts/wgcna_analysis.R
@@ -6,11 +6,13 @@ library(rjson)
options(stringsAsFactors = FALSE);
-imgDir = Sys.getenv("GENERATED_IMAGE_DIR")
+cat("Running the wgcna analysis script\n")
+
# load expression data **assumes from json files row(traits)(columns info+samples)
# pass the file_path as arg
# pass the file path to read json data
+
args = commandArgs(trailingOnly=TRUE)
if (length(args)==0) {
@@ -21,6 +23,7 @@ if (length(args)==0) {
}
inputData <- fromJSON(file = json_file_path)
+imgDir = inputData$TMPDIR
trait_sample_data <- do.call(rbind, inputData$trait_sample_data)
@@ -83,6 +86,11 @@ network <- blockwiseModules(dataExpr,
+cat("Generated network \n")
+
+network
+
+
genImageRandStr <- function(prefix){
randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
@@ -90,14 +98,19 @@ genImageRandStr <- function(prefix){
return(paste(randStr,".png",sep=""))
}
+
mergedColors <- labels2colors(network$colors)
imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput"))
png(imageLoc,width=1000,height=600,type='cairo-png')
+
+cat("Generating the CLuster dendrogram\n")
+
+
plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]],
"Module colors",
-dendroLabels = FALSE, hang = 0.03,
+dendroLabels = NULL, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py
index ec81d94..5f23a86 100644
--- a/tests/unit/computations/test_wgcna.py
+++ b/tests/unit/computations/test_wgcna.py
@@ -10,13 +10,16 @@ from gn3.computations.wgcna import call_wgcna_script
class TestWgcna(TestCase):
"""test class for wgcna"""
+ @mock.patch("gn3.computations.wgcna.process_image")
@mock.patch("gn3.computations.wgcna.run_cmd")
@mock.patch("gn3.computations.wgcna.compose_wgcna_cmd")
@mock.patch("gn3.computations.wgcna.dump_wgcna_data")
def test_call_wgcna_script(self,
mock_dumping_data,
mock_compose_wgcna,
- mock_run_cmd):
+ mock_run_cmd,
+ mock_img,
+ ):
"""test for calling wgcna script"""
# pylint: disable = line-too-long
@@ -50,7 +53,7 @@ class TestWgcna(TestCase):
"output": "Flagging genes and samples with too many missing values...\n ..step 1\nAllowing parallel execution with up to 3 working processes.\npickSoftThreshold: will use block size 7.\n pickSoftThreshold: calculating connectivity for given powers...\n ..working on genes 1 through 7 of 7\n Flagging genes and samples with too many missing values...\n ..step 1\n ..Working on block 1 .\n TOM calculation: adjacency..\n ..will not use multithreading.\nclustering..\n ....detecting modules..\n ....calculating module eigengenes..\n ....checking kME in modules..\n ..merging modules that are too close..\n mergeCloseModules: Merging modules whose distance is less than 0.15\n mergeCloseModules: less than two proper modules.\n ..color levels are turquoise\n ..there is nothing to merge.\n Calculating new MEs...\n"
}
- json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"outputdata\":{\"eigengenes\":[],\"colors\":[]}}"
+ json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"output\":{\"eigengenes\":[],\"imageLoc\":[],\"colors\":[]}}"
expected_output = {
@@ -61,9 +64,11 @@ class TestWgcna(TestCase):
"TOMtype": "unsigned"
},
- "outputdata": {
+ "output": {
"eigengenes": [],
- "colors": []
+ "imageLoc": [],
+ "colors": [],
+ "image_data": "AFDSFNBSDGJJHH"
}
},
@@ -74,6 +79,7 @@ class TestWgcna(TestCase):
with mock.patch("builtins.open", mock.mock_open(read_data=json_output)):
mock_run_cmd.return_value = mock_run_cmd_results
+ mock_img.return_value = b"AFDSFNBSDGJJHH"
results = call_wgcna_script(
"Rscript/GUIX_PATH/scripts/r_file.R", request_data)