diff options
author | Frederick Muriuki Muriithi | 2021-10-25 15:13:05 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2021-10-25 15:13:05 +0300 |
commit | 5a472ebab04c68cd5228f253cc98d0ae22a520d7 (patch) | |
tree | 3d6b1a5a8933896a6e7fdc98f473ef069accd348 | |
parent | 0814eea6b57e45d4337424e63c164d204d03b64d (diff) | |
parent | 5440bfcd6940db08c4479a39ba66dbc802b2c426 (diff) | |
download | genenetwork3-5a472ebab04c68cd5228f253cc98d0ae22a520d7.tar.gz |
Merge branch 'main' of github.com:genenetwork/genenetwork3 into partial-correlations
-rw-r--r-- | gn3/computations/wgcna.py | 49 | ||||
-rw-r--r-- | guix.scm | 1 | ||||
-rw-r--r-- | scripts/wgcna_analysis.R | 17 | ||||
-rw-r--r-- | tests/unit/computations/test_wgcna.py | 14 |
4 files changed, 73 insertions, 8 deletions
diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py index fd508fa..ab12fe7 100644 --- a/gn3/computations/wgcna.py +++ b/gn3/computations/wgcna.py @@ -3,8 +3,11 @@ import os import json import uuid -from gn3.settings import TMPDIR +import subprocess +import base64 + +from gn3.settings import TMPDIR from gn3.commands import run_cmd @@ -14,12 +17,46 @@ def dump_wgcna_data(request_data: dict): temp_file_path = os.path.join(TMPDIR, filename) + request_data["TMPDIR"] = TMPDIR + with open(temp_file_path, "w") as output_file: json.dump(request_data, output_file) return temp_file_path +def stream_cmd_output(socketio, request_data, cmd: str): + """function to stream in realtime""" + # xtodo syncing and closing /edge cases + + socketio.emit("output", {"data": f"calling you script {cmd}"}, + namespace="/", room=request_data["socket_id"]) + results = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) + + if results.stdout is not None: + + for line in iter(results.stdout.readline, b""): + socketio.emit("output", + {"data": line.decode("utf-8").rstrip()}, + namespace="/", room=request_data["socket_id"]) + + socketio.emit( + "output", {"data": + "parsing the output results"}, namespace="/", + room=request_data["socket_id"]) + + +def process_image(image_loc: str) -> bytes: + """encode the image""" + + try: + with open(image_loc, "rb") as image_file: + return base64.b64encode(image_file.read()) + except FileNotFoundError: + return b"" + + def compose_wgcna_cmd(rscript_path: str, temp_file_path: str): """function to componse wgcna cmd""" # (todo):issue relative paths to abs paths @@ -32,6 +69,8 @@ def call_wgcna_script(rscript_path: str, request_data: dict): generated_file = dump_wgcna_data(request_data) cmd = compose_wgcna_cmd(rscript_path, generated_file) + # stream_cmd_output(request_data, cmd) disable streaming of data + try: run_cmd_results = run_cmd(cmd) @@ -40,8 +79,14 @@ def call_wgcna_script(rscript_path: str, request_data: dict): if run_cmd_results["code"] != 0: return run_cmd_results + + output_file_data = json.load(outputfile) + output_file_data["output"]["image_data"] = process_image( + output_file_data["output"]["imageLoc"]).decode("ascii") + # json format only supports unicode string// to get image data reconvert + return { - "data": json.load(outputfile), + "data": output_file_data, **run_cmd_results } except FileNotFoundError: @@ -100,6 +100,7 @@ ("python-redis" ,python-redis) ("python-requests" ,python-requests) ("python-scipy" ,python-scipy) + ("python-flask-socketio" ,python-flask-socketio) ("python-sqlalchemy-stubs" ,python-sqlalchemy-stubs) ("r-optparse" ,r-optparse) diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 17b3537..b0d25a9 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -6,11 +6,13 @@ library(rjson) options(stringsAsFactors = FALSE); -imgDir = Sys.getenv("GENERATED_IMAGE_DIR") +cat("Running the wgcna analysis script\n") + # load expression data **assumes from json files row(traits)(columns info+samples) # pass the file_path as arg # pass the file path to read json data + args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { @@ -21,6 +23,7 @@ if (length(args)==0) { } inputData <- fromJSON(file = json_file_path) +imgDir = inputData$TMPDIR trait_sample_data <- do.call(rbind, inputData$trait_sample_data) @@ -83,6 +86,11 @@ network <- blockwiseModules(dataExpr, +cat("Generated network \n") + +network + + genImageRandStr <- function(prefix){ randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") @@ -90,14 +98,19 @@ genImageRandStr <- function(prefix){ return(paste(randStr,".png",sep="")) } + mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) png(imageLoc,width=1000,height=600,type='cairo-png') + +cat("Generating the CLuster dendrogram\n") + + plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], "Module colors", -dendroLabels = FALSE, hang = 0.03, +dendroLabels = NULL, hang = 0.03, addGuide = TRUE, guideHang = 0.05) diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py index ec81d94..5f23a86 100644 --- a/tests/unit/computations/test_wgcna.py +++ b/tests/unit/computations/test_wgcna.py @@ -10,13 +10,16 @@ from gn3.computations.wgcna import call_wgcna_script class TestWgcna(TestCase): """test class for wgcna""" + @mock.patch("gn3.computations.wgcna.process_image") @mock.patch("gn3.computations.wgcna.run_cmd") @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd") @mock.patch("gn3.computations.wgcna.dump_wgcna_data") def test_call_wgcna_script(self, mock_dumping_data, mock_compose_wgcna, - mock_run_cmd): + mock_run_cmd, + mock_img, + ): """test for calling wgcna script""" # pylint: disable = line-too-long @@ -50,7 +53,7 @@ class TestWgcna(TestCase): "output": "Flagging genes and samples with too many missing values...\n ..step 1\nAllowing parallel execution with up to 3 working processes.\npickSoftThreshold: will use block size 7.\n pickSoftThreshold: calculating connectivity for given powers...\n ..working on genes 1 through 7 of 7\n Flagging genes and samples with too many missing values...\n ..step 1\n ..Working on block 1 .\n TOM calculation: adjacency..\n ..will not use multithreading.\nclustering..\n ....detecting modules..\n ....calculating module eigengenes..\n ....checking kME in modules..\n ..merging modules that are too close..\n mergeCloseModules: Merging modules whose distance is less than 0.15\n mergeCloseModules: less than two proper modules.\n ..color levels are turquoise\n ..there is nothing to merge.\n Calculating new MEs...\n" } - json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"outputdata\":{\"eigengenes\":[],\"colors\":[]}}" + json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"output\":{\"eigengenes\":[],\"imageLoc\":[],\"colors\":[]}}" expected_output = { @@ -61,9 +64,11 @@ class TestWgcna(TestCase): "TOMtype": "unsigned" }, - "outputdata": { + "output": { "eigengenes": [], - "colors": [] + "imageLoc": [], + "colors": [], + "image_data": "AFDSFNBSDGJJHH" } }, @@ -74,6 +79,7 @@ class TestWgcna(TestCase): with mock.patch("builtins.open", mock.mock_open(read_data=json_output)): mock_run_cmd.return_value = mock_run_cmd_results + mock_img.return_value = b"AFDSFNBSDGJJHH" results = call_wgcna_script( "Rscript/GUIX_PATH/scripts/r_file.R", request_data) |