diff options
-rw-r--r-- | gn3/api/wgcna.py | 25 | ||||
-rw-r--r-- | gn3/app.py | 2 | ||||
-rw-r--r-- | gn3/computations/wgcna.py | 51 | ||||
-rw-r--r-- | gn3/settings.py | 2 | ||||
-rw-r--r-- | guix.scm | 3 | ||||
-rw-r--r-- | scripts/output.json | 161 | ||||
-rw-r--r-- | scripts/wgcna_analysis.R | 115 | ||||
-rw-r--r-- | scripts/wgcna_test_data.csv | 9 | ||||
-rw-r--r-- | scripts/wgcna_test_data.json | 65 | ||||
-rw-r--r-- | tests/integration/test_wgcna.py | 73 | ||||
-rw-r--r-- | tests/unit/computations/test_wgcna.py | 160 |
11 files changed, 665 insertions, 1 deletions
diff --git a/gn3/api/wgcna.py b/gn3/api/wgcna.py new file mode 100644 index 0000000..fa044cf --- /dev/null +++ b/gn3/api/wgcna.py @@ -0,0 +1,25 @@ +"""endpoint to run wgcna analysis""" +from flask import Blueprint +from flask import request +from flask import current_app +from flask import jsonify + +from gn3.computations.wgcna import call_wgcna_script + +wgcna = Blueprint("wgcna", __name__) + + +@wgcna.route("/run_wgcna", methods=["POST"]) +def run_wgcna(): + """run wgcna:output should be a json with a the data""" + + wgcna_data = request.json + + wgcna_script = current_app.config["WGCNA_RSCRIPT"] + + results = call_wgcna_script(wgcna_script, wgcna_data) + + if results.get("data") is None: + return jsonify(results), 401 + + return jsonify(results), 200 @@ -13,6 +13,7 @@ from gn3.api.general import general from gn3.api.heatmaps import heatmaps from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry +from gn3.api.wgcna import wgcna def create_app(config: Union[Dict, str, None] = None) -> Flask: """Create a new flask object""" @@ -42,4 +43,5 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(heatmaps, url_prefix="/api/heatmaps") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(data_entry, url_prefix="/api/dataentry") + app.register_blueprint(wgcna, url_prefix="/api/wgcna") return app diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py new file mode 100644 index 0000000..fd508fa --- /dev/null +++ b/gn3/computations/wgcna.py @@ -0,0 +1,51 @@ +"""module contains code to preprocess and call wgcna script""" + +import os +import json +import uuid +from gn3.settings import TMPDIR + +from gn3.commands import run_cmd + + +def dump_wgcna_data(request_data: dict): + """function to dump request data to json file""" + filename = f"{str(uuid.uuid4())}.json" + + temp_file_path = os.path.join(TMPDIR, filename) + + with open(temp_file_path, "w") as output_file: + json.dump(request_data, output_file) + + return temp_file_path + + +def compose_wgcna_cmd(rscript_path: str, temp_file_path: str): + """function to componse wgcna cmd""" + # (todo):issue relative paths to abs paths + cmd = f"Rscript ./scripts/{rscript_path} {temp_file_path}" + return cmd + + +def call_wgcna_script(rscript_path: str, request_data: dict): + """function to call wgcna script""" + generated_file = dump_wgcna_data(request_data) + cmd = compose_wgcna_cmd(rscript_path, generated_file) + + try: + + run_cmd_results = run_cmd(cmd) + + with open(generated_file, "r") as outputfile: + + if run_cmd_results["code"] != 0: + return run_cmd_results + return { + "data": json.load(outputfile), + **run_cmd_results + } + except FileNotFoundError: + # relook at handling errors gn3 + return { + "output": "output file not found" + } diff --git a/gn3/settings.py b/gn3/settings.py index 9d7bba3..150d96d 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -25,6 +25,8 @@ GN2_BASE_URL = "http://www.genenetwork.org/" # biweight script BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" +# wgcna script +WGCNA_RSCRIPT = "wgcna_analysis.R" # qtlreaper command REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) @@ -84,7 +84,6 @@ #:recursive? #t #:select? git-file?)) (propagated-inputs `(("coreutils" ,coreutils) - ("csvdiff" ,go-github-com-aswinkarthik-csvdiff) ("gemma-wrapper" ,gemma-wrapper) ("gunicorn" ,gunicorn) ("python" ,python-wrapper) @@ -104,6 +103,8 @@ ("r-optparse" ,r-optparse) ("r-qtl" ,r-qtl) ("r-stringi" ,r-stringi) + ("r-wgcna" ,r-wgcna) + ("r-rjson" ,r-rjson) ("python-plotly" ,python-plotly) ("python-pandas" ,python-pandas) ("rust-qtlreaper" ,rust-qtlreaper) diff --git a/scripts/output.json b/scripts/output.json new file mode 100644 index 0000000..9caf837 --- /dev/null +++ b/scripts/output.json @@ -0,0 +1,161 @@ +{ + "ModEigens":{ + "MEturquoise":[ + 0.0646677768085351, + 0.137200224277058, + 0.63451113720732, + -0.544002665501479, + -0.489487590361863, + 0.197111117570427 + ] + }, + "soft_threshold":{ + "Power":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 12, + 14, + 16, + 18, + 20 + ], + "SFT.R.sq":[ + 0.0181093215847335, + 0.00011984142485113, + 0.000171967046945159, + 0.0105462010616537, + 0.0341444584348834, + 0.0687163726151286, + 0.0306423506274298, + 0.0492567394226327, + 0.0789539269997996, + 0.0944880158122527, + 0.122195040270446, + 0.0340768567186139, + 0.0625860126119281, + 0.100082257137014, + 0.128277841930818 + ], + "slope":[ + 3.81011386139005, + -0.170826531149538, + 0.159161787390082, + -1.01047981107833, + -1.55943531024794, + -1.93420125756514, + -1.16799247295814, + -1.33414063070555, + -1.54984944650438, + -1.54715757057087, + -1.49931213589121, + -1.80460140377151, + -2.19055343583319, + -2.52135805259606, + -2.58599487577447 + ], + "truncated.R.sq":[ + 0.768989700952805, + 0.522025793450566, + 0.329341226670409, + 0.110265719555879, + 0.0195649645183151, + -0.0503253079741786, + 0.0507498358330625, + -0.0129255450167765, + -0.035717519210676, + -0.0807094793662766, + -0.0967803564932559, + 0.00172686282662393, + -0.0340811003657508, + -0.0390284600100592, + -0.0489269837827069 + ], + "mean.k.":[ + 4.20178789454309, + 3.44556816856968, + 2.98532344074325, + 2.65297323828966, + 2.39517682414009, + 2.18846935370095, + 2.01963258852289, + 1.87999059876872, + 1.76335304166057, + 1.66510080962817, + 1.51038968360321, + 1.39583176924843, + 1.30882729664563, + 1.24120316437299, + 1.18753154238216 + ], + "median.k.":[ + 4.65733789933094, + 4.13585224131512, + 3.75980713552836, + 3.43775457512904, + 3.15305040369031, + 2.89933881967523, + 2.67225531456956, + 2.46825611568646, + 2.28437024155601, + 2.118086531192, + 1.83011067501282, + 1.59073325345641, + 1.38991168639473, + 1.2201000051609, + 1.07553194658444 + ], + "max.k.":[ + 4.81522245318686, + 4.21987143583501, + 3.83876962723542, + 3.55526976885104, + 3.32904938849614, + 3.14312441404036, + 2.98828051379132, + 2.85837136671219, + 2.74879840851137, + 2.65594228759455, + 2.50929962297015, + 2.40113981571731, + 2.31993775805391, + 2.25792900175825, + 2.2098218130451 + ] + }, + "blockGenes":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "net_colors":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "net_unmerged":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "imageLoc":"/WGCNAoutput_1uujpTIpC.png" +}
\ No newline at end of file diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R new file mode 100644 index 0000000..17b3537 --- /dev/null +++ b/scripts/wgcna_analysis.R @@ -0,0 +1,115 @@ + + +library(WGCNA); +library(stringi); +library(rjson) + +options(stringsAsFactors = FALSE); + +imgDir = Sys.getenv("GENERATED_IMAGE_DIR") +# load expression data **assumes from json files row(traits)(columns info+samples) +# pass the file_path as arg +# pass the file path to read json data + +args = commandArgs(trailingOnly=TRUE) + +if (length(args)==0) { + stop("Argument for the file location is required", call.=FALSE) +} else { + # default output file + json_file_path = args[1] +} + +inputData <- fromJSON(file = json_file_path) + + +trait_sample_data <- do.call(rbind, inputData$trait_sample_data) + +dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) +# transform expressionData + +dataExpr <- data.frame(t(dataExpr)) +gsg = goodSamplesGenes(dataExpr, verbose = 3) + +if (!gsg$allOK) +{ +if (sum(!gsg$goodGenes)>0) +printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", "))); +if (sum(!gsg$goodSamples)>0) +printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", "))); +# Remove the offending genes and samples from the data: +dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] +} + +## network constructions and modules + +names(dataExpr) = inputData$trait_names + +# Allow multi-threading within WGCNA +enableWGCNAThreads() + +# choose softthreshhold (Calculate soft threshold) +# xtodo allow users to pass args + +powers <- c(c(1:10), seq(from = 12, to=20, by=2)) +sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) + +# check the power estimate + +if (is.na(sft$powerEstimate)){ + powerEst = 1 +}else{ + powerEst = sft$powerEstimate +} + +# pass user options +network <- blockwiseModules(dataExpr, + #similarity matrix options + corType = inputData$corType, + #adjacency matrix options + + power = powerEst, + networkType = "unsigned", + #TOM options + TOMtype = inputData$TOMtype, + + #module indentification + verbose = 3, + + minmodulesSize = inputData$minModuleSize, + deepSplit = 3, + PamRespectsDendro = FALSE + ) + + + +genImageRandStr <- function(prefix){ + + randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,".png",sep="")) +} + +mergedColors <- labels2colors(network$colors) + +imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) +png(imageLoc,width=1000,height=600,type='cairo-png') + +plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], +"Module colors", +dendroLabels = FALSE, hang = 0.03, +addGuide = TRUE, guideHang = 0.05) + + +json_data <- list(input = inputData, + output = list(ModEigens=network$MEs, + soft_threshold=sft$fitIndices, + blockGenes =network$blockGenes[[1]], + net_colors =network$colors, + power_used_for_analy=powerEst, + net_unmerged=network$unmergedColors, + imageLoc=imageLoc)) + +json_data <- toJSON(json_data) + +write(json_data,file= json_file_path)
\ No newline at end of file diff --git a/scripts/wgcna_test_data.csv b/scripts/wgcna_test_data.csv new file mode 100644 index 0000000..8db9598 --- /dev/null +++ b/scripts/wgcna_test_data.csv @@ -0,0 +1,9 @@ +129S1/SvImJ,A/J,AKR/J,B6D2F1,BALB/cByJ,BALB/cJ
+7.142,7.31,7.49,6.899,7.172,7.396
+7.071,7.05,7.313,6.999,7.293,7.117
+7.221,7.246,7.754,6.866,6.752,7.269
+9.221,9.246,9.954,6.866,6.952,9.269
+7.221,7.246,7.754,6.866,6.752,7.269
+7.221,7.246,7.754,6.866,6.752,7.269
+11.221,11.246,11.1154,6.866,6.1152,11.269
+
diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json new file mode 100644 index 0000000..1d469f4 --- /dev/null +++ b/scripts/wgcna_test_data.json @@ -0,0 +1,65 @@ +{ + "trait_names":["1455537_at","1425637_at","1449593_at","1421945_a_at","1450423_s_at","1423841_at","1451144_at"], + "trait_sample_data":[ + { + "129S1/SvImJ": 7.142, + "A/J": 7.31, + "AKR/J": 7.49, + "B6D2F1": 6.899, + "BALB/cByJ": 7.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 7.071, + "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, + "BALB/cByJ": 7.293, + "BALB/cJ": 7.117 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 9.221, + "A/J": 9.246, + "AKR/J": 9.954, + "B6D2F1": 6.866, + "BALB/cByJ": 6.952, + "BALB/cJ": 9.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 11.221, + "A/J": 11.246, + "AKR/J": 11.1154, + "B6D2F1": 6.866, + "BALB/cByJ": 6.1152, + "BALB/cJ": 11.269 + } + ], + "TOMtype": "unsigned", + "minModuleSize": 30, + "corType": "pearson" + +} diff --git a/tests/integration/test_wgcna.py b/tests/integration/test_wgcna.py new file mode 100644 index 0000000..078449d --- /dev/null +++ b/tests/integration/test_wgcna.py @@ -0,0 +1,73 @@ +"""integration tests for wgcna""" + +from unittest import TestCase +from unittest import mock + +from gn3.app import create_app + + +class WgcnaIntegrationTest(TestCase): + """class contains wgcna integration tests""" + + def setUp(self): + self.app = create_app().test_client() + + @mock.patch("gn3.api.wgcna.call_wgcna_script") + def test_wgcna_endpoint(self, mock_wgcna_script): + """test /api/wgcna/run_wgcna endpoint""" + + wgcna_output_data = { + "code": 0, + "output": "run script successfully", + "data": { + "ModEigens": { + "MEturquoise": [ + 0.0646677768085351, + 0.137200224277058, + 0.63451113720732, + -0.544002665501479, + -0.489487590361863, + 0.197111117570427 + ] + }, + "net_colors": { + "X1": "turquoise", + "X2": "turquoise", + "X3": "turquoise", + "X4": "turquoise" + }, + "imageLoc": "/WGCNAoutput_1uujpTIpC.png" + } + } + + request_data = { + "trait_names": [ + "1455537_at", + "1425637_at" + ], + "trait_sample_data": [ + { + "129S1/SvImJ": 6.142, + "A/J": 5.31, + "AKR/J": 3.49, + "B6D2F1": 2.899, + "BALB/cByJ": 1.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 1.42, + "A/J": 2.31, + "AKR/J": 5.49, + "B6D2F1": 3.899, + "BALB/cByJ": 1.172, + "BALB/cJ": 7.396 + } + ] + } + mock_wgcna_script.return_value = wgcna_output_data + + response = self.app.post("/api/wgcna/run_wgcna", + json=request_data, follow_redirects=True) + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.get_json(), wgcna_output_data) diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py new file mode 100644 index 0000000..ec81d94 --- /dev/null +++ b/tests/unit/computations/test_wgcna.py @@ -0,0 +1,160 @@ +"""module contains python code for wgcna""" +from unittest import TestCase +from unittest import mock + +from gn3.computations.wgcna import dump_wgcna_data +from gn3.computations.wgcna import compose_wgcna_cmd +from gn3.computations.wgcna import call_wgcna_script + + +class TestWgcna(TestCase): + """test class for wgcna""" + + @mock.patch("gn3.computations.wgcna.run_cmd") + @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd") + @mock.patch("gn3.computations.wgcna.dump_wgcna_data") + def test_call_wgcna_script(self, + mock_dumping_data, + mock_compose_wgcna, + mock_run_cmd): + """test for calling wgcna script""" + + # pylint: disable = line-too-long + mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + request_data = { + "trait_names": ["1455537_at", "1425637_at", "1449593_at", "1421945_a_at", "1450423_s_at", "1423841_at", "1451144_at"], + "trait_sample_data": [ + { + "129S1/SvImJ": 7.142, + "A/J": 7.31, + "AKR/J": 7.49, + "B6D2F1": 6.899, + "BALB/cByJ": 7.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 7.071, + "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, + "BALB/cByJ": 7.293, + "BALB/cJ": 7.117 + }]} + + mock_run_cmd_results = { + + "code": 0, + "output": "Flagging genes and samples with too many missing values...\n ..step 1\nAllowing parallel execution with up to 3 working processes.\npickSoftThreshold: will use block size 7.\n pickSoftThreshold: calculating connectivity for given powers...\n ..working on genes 1 through 7 of 7\n Flagging genes and samples with too many missing values...\n ..step 1\n ..Working on block 1 .\n TOM calculation: adjacency..\n ..will not use multithreading.\nclustering..\n ....detecting modules..\n ....calculating module eigengenes..\n ....checking kME in modules..\n ..merging modules that are too close..\n mergeCloseModules: Merging modules whose distance is less than 0.15\n mergeCloseModules: less than two proper modules.\n ..color levels are turquoise\n ..there is nothing to merge.\n Calculating new MEs...\n" + } + + json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"outputdata\":{\"eigengenes\":[],\"colors\":[]}}" + + expected_output = { + + "data": { + "inputdata": { + "trait_sample_data ": {}, + "minModuleSize": 30, + "TOMtype": "unsigned" + }, + + "outputdata": { + "eigengenes": [], + "colors": [] + } + }, + + **mock_run_cmd_results + + } + + with mock.patch("builtins.open", mock.mock_open(read_data=json_output)): + + mock_run_cmd.return_value = mock_run_cmd_results + + results = call_wgcna_script( + "Rscript/GUIX_PATH/scripts/r_file.R", request_data) + + mock_dumping_data.assert_called_once_with(request_data) + + mock_compose_wgcna.assert_called_once_with( + "Rscript/GUIX_PATH/scripts/r_file.R", + "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json") + + mock_run_cmd.assert_called_once_with( + "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json") + + self.assertEqual(results, expected_output) + + @mock.patch("gn3.computations.wgcna.run_cmd") + @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd") + @mock.patch("gn3.computations.wgcna.dump_wgcna_data") + def test_call_wgcna_script_fails(self, mock_dumping_data, mock_compose_wgcna, mock_run_cmd): + """test for calling wgcna script\ + fails and generates the expected error""" + # pylint: disable = line-too-long, + mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json" + + expected_error = { + "code": 2, + "output": "could not read the json file" + } + + with mock.patch("builtins.open", mock.mock_open(read_data="")): + + mock_run_cmd.return_value = expected_error + self.assertEqual(call_wgcna_script( + "input_file.R", ""), expected_error) + + def test_compose_wgcna_cmd(self): + """test for composing wgcna cmd""" + wgcna_cmd = compose_wgcna_cmd( + "wgcna.r", "/tmp/wgcna.json") + self.assertEqual( + wgcna_cmd, "Rscript ./scripts/wgcna.r /tmp/wgcna.json") + + @mock.patch("gn3.computations.wgcna.TMPDIR", "/tmp") + @mock.patch("gn3.computations.wgcna.uuid.uuid4") + def test_create_json_file(self, file_name_generator): + """test for writing the data to a csv file""" + # # All the traits we have data for (should not contain duplicates) + # All the strains we have data for (contains duplicates) + + trait_sample_data = {"1425642_at": {"129S1/SvImJ": 7.142, + "A/J": 7.31, "AKR/J": 7.49, + "B6D2F1": 6.899, "BALB/cByJ": 7.172, + "BALB/cJ": 7.396}, + "1457784_at": {"129S1/SvImJ": 7.071, "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, "BALB/cByJ": 7.293, + "BALB/cJ": 7.117}, + "1444351_at": {"129S1/SvImJ": 7.221, "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, "BALB/cByJ": 6.752, + "BALB/cJ": 7.269} + + } + + expected_input = { + "trait_sample_data": trait_sample_data, + "TOMtype": "unsigned", + "minModuleSize": 30 + } + + with mock.patch("builtins.open", mock.mock_open()) as file_handler: + + file_name_generator.return_value = "facb73ff-7eef-4053-b6ea-e91d3a22a00c" + + results = dump_wgcna_data( + expected_input) + + file_handler.assert_called_once_with( + "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w') + + self.assertEqual( + results, "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json") |