From 1600992807a0b3edbe10e8c0baf80a41636a7650 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 21:58:20 +0300 Subject: init commit for wgcna script --- scripts/wgcna_analysis.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/wgcna_analysis.R (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3 From ea0e92d3f63a9f403aacd5ab1590f61f2752158a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 22:12:43 +0300 Subject: load the required data for analysis --- scripts/wgcna_analysis.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index e69de29..a8170b6 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -0,0 +1,21 @@ +# initial workspace setup + +library(WGCNA); +stringsAsFactors = FALSE + +# load expression data **assumes csv format row(traits)(columns info+samples) + + +wgcnaRawData <- read.csv(file = "wgcna_data.csv") + +# transform expressionData + +datExpr <- as.data.frame(t(wgcnaRawData)); + + + + + + + + -- cgit v1.2.3 From 49fdd614d4b18f7d28126e4ebbf3adca57f0416f Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 22:31:18 +0300 Subject: Checking data for excessive missing values --- scripts/wgcna_analysis.R | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index a8170b6..8e90d7d 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -10,10 +10,28 @@ wgcnaRawData <- read.csv(file = "wgcna_data.csv") # transform expressionData -datExpr <- as.data.frame(t(wgcnaRawData)); +dataExpr <- as.data.frame(t(wgcnaRawData)); +# data cleaning + +# adopted from docs +gsg = goodSamplesGenes(dataExpr, verbose = 3); + + + +if (!gsg$allOK) +{ +# Optionally, print the gene and sample names that were removed: +if (sum(!gsg$goodGenes)>0) +printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", "))); +if (sum(!gsg$goodSamples)>0) +printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", "))); +# Remove the offending genes and samples from the data: +dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] +} + -- cgit v1.2.3 From 42c120dff2b3cac8a6b6546ebab9daf021aac11a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 22:42:43 +0300 Subject: compute the softthreshhold --- scripts/wgcna_analysis.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 8e90d7d..cb93492 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -5,15 +5,12 @@ stringsAsFactors = FALSE # load expression data **assumes csv format row(traits)(columns info+samples) - wgcnaRawData <- read.csv(file = "wgcna_data.csv") # transform expressionData dataExpr <- as.data.frame(t(wgcnaRawData)); - - # data cleaning # adopted from docs @@ -32,6 +29,17 @@ printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples] dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] } +# network constructions and modules + +# choose softthreshhold (Calculate soft threshold if the user specified the) + +powers = c(c(1:10), seq(from = 12, to=20, by=2)) +sft = pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) + + + + + -- cgit v1.2.3 From 1c98a10bf5015a85b856f9e937417d51ec05d781 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 22:49:43 +0300 Subject: construct gene co-expression network & module detection --- scripts/wgcna_analysis.R | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index cb93492..29f0259 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -33,9 +33,26 @@ dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] # choose softthreshhold (Calculate soft threshold if the user specified the) -powers = c(c(1:10), seq(from = 12, to=20, by=2)) -sft = pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) - +powers <- c(c(1:10), seq(from = 12, to=20, by=2)) +sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) + +# pass user options +network <- blockwiseModules(dataExpr, + #similarity matrix options + corType = "pearson", + #adjacency matrix options + + power = sft$powerEstimate, + networkType = "unsigned", + #TOM options + TOMtype = "unsigned", + + #module indentification + + minmodulesSize = 30, + deepSplit = 5, + PamRespectsDendro = FALSE + ) -- cgit v1.2.3 From b32f4ee074d55d8ab78863d4c5acc652ec9cd839 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 23:18:18 +0300 Subject: plot plotDendroAndColors and generate png --- scripts/wgcna_analysis.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 29f0259..65ff36e 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -54,6 +54,24 @@ network <- blockwiseModules(dataExpr, PamRespectsDendro = FALSE ) +# plot dendro add color + +# Convert labels to colors for plotting +mergedColors = labels2colors(net$colors) +# Plot the dendrogram and the module colors underneath + + +# generate random name for png && save the image location + + + +png("WGCNAoutput.png",width=1000,height=600,type='cairo-png') + +plotDendroAndColors(network$dendrograms[[1]],mergedColors[net$blockGenes[[1]]], +"Module colors", +dendroLabels = FALSE, hang = 0.03, +addGuide = TRUE, guideHang = 0.05) + -- cgit v1.2.3 From 49b852baa46c0f06caa2f0621b18d521cf334483 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 23:40:17 +0300 Subject: function to generate rand str for image --- scripts/wgcna_analysis.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 65ff36e..efe0336 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -1,7 +1,9 @@ # initial workspace setup library(WGCNA); -stringsAsFactors = FALSE +library(stringi); + +options(stringsAsFactors = FALSE); # load expression data **assumes csv format row(traits)(columns info+samples) @@ -64,8 +66,18 @@ mergedColors = labels2colors(net$colors) # generate random name for png && save the image location +genImageRandStr <- function(prefix){ + + randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,".png",sep="")) +} + + + +png(genImageRandStr,width=1000,height=600,type='cairo-png') + -png("WGCNAoutput.png",width=1000,height=600,type='cairo-png') plotDendroAndColors(network$dendrograms[[1]],mergedColors[net$blockGenes[[1]]], "Module colors", -- cgit v1.2.3 From 56092341abe9579b995ff6105722154183b31d22 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 14 Sep 2021 23:41:46 +0300 Subject: remove debug statements --- scripts/wgcna_analysis.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index efe0336..16a44fd 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -56,14 +56,6 @@ network <- blockwiseModules(dataExpr, PamRespectsDendro = FALSE ) -# plot dendro add color - -# Convert labels to colors for plotting -mergedColors = labels2colors(net$colors) -# Plot the dendrogram and the module colors underneath - - -# generate random name for png && save the image location genImageRandStr <- function(prefix){ @@ -74,6 +66,7 @@ genImageRandStr <- function(prefix){ } +mergedColors = labels2colors(net$colors) png(genImageRandStr,width=1000,height=600,type='cairo-png') -- cgit v1.2.3 From e0f25dedea08842820424ced51af9af0c7eaab4b Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 15 Sep 2021 02:13:03 +0300 Subject: Fetch IMAGE_DIR env and add img location --- scripts/wgcna_analysis.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 16a44fd..54650df 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -1,10 +1,14 @@ # initial workspace setup + +# todo pass required input data here library(WGCNA); library(stringi); options(stringsAsFactors = FALSE); +imgDir = Sys.getenv("GENERATED_IMAGE_DIR") + # load expression data **assumes csv format row(traits)(columns info+samples) wgcnaRawData <- read.csv(file = "wgcna_data.csv") @@ -33,7 +37,7 @@ dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] # network constructions and modules -# choose softthreshhold (Calculate soft threshold if the user specified the) +# choose softthreshhold (Calculate soft threshold) powers <- c(c(1:10), seq(from = 12, to=20, by=2)) sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) @@ -65,12 +69,12 @@ genImageRandStr <- function(prefix){ return(paste(randStr,".png",sep="")) } +mergedColors <- labels2colors(net$colors) -mergedColors = labels2colors(net$colors) - -png(genImageRandStr,width=1000,height=600,type='cairo-png') +imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) +png(imageLoc,width=1000,height=600,type='cairo-png') plotDendroAndColors(network$dendrograms[[1]],mergedColors[net$blockGenes[[1]]], "Module colors", @@ -79,9 +83,3 @@ addGuide = TRUE, guideHang = 0.05) - - - - - - -- cgit v1.2.3 From 94e2e79141cbddb48456fb5707eb0e4e36e97d3b Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 15 Sep 2021 02:19:03 +0300 Subject: rename variables && delete debugs --- scripts/wgcna_analysis.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 54650df..390bee4 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -1,7 +1,3 @@ -# initial workspace setup - - -# todo pass required input data here library(WGCNA); library(stringi); @@ -11,15 +7,14 @@ imgDir = Sys.getenv("GENERATED_IMAGE_DIR") # load expression data **assumes csv format row(traits)(columns info+samples) -wgcnaRawData <- read.csv(file = "wgcna_data.csv") +inputData <- read.csv(file = "wgcna_data.csv") # transform expressionData -dataExpr <- as.data.frame(t(wgcnaRawData)); +dataExpr <- as.data.frame(t(inputData)); # data cleaning -# adopted from docs gsg = goodSamplesGenes(dataExpr, verbose = 3); -- cgit v1.2.3 From cee10bacd4316eb807c36b8a11e84f9be5945f44 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 15 Sep 2021 04:34:26 +0300 Subject: minor fixes --- scripts/wgcna_analysis.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 390bee4..267cd86 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -1,3 +1,5 @@ + + library(WGCNA); library(stringi); @@ -6,6 +8,7 @@ options(stringsAsFactors = FALSE); imgDir = Sys.getenv("GENERATED_IMAGE_DIR") # load expression data **assumes csv format row(traits)(columns info+samples) +# pass the file_path as arg inputData <- read.csv(file = "wgcna_data.csv") @@ -13,26 +16,29 @@ inputData <- read.csv(file = "wgcna_data.csv") dataExpr <- as.data.frame(t(inputData)); -# data cleaning +## data cleaning gsg = goodSamplesGenes(dataExpr, verbose = 3); - +# https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/ if (!gsg$allOK) { -# Optionally, print the gene and sample names that were removed: if (sum(!gsg$goodGenes)>0) -printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", "))); +printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", "))); if (sum(!gsg$goodSamples)>0) -printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", "))); +printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", "))); # Remove the offending genes and samples from the data: dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] } -# network constructions and modules +## network constructions and modules + +# Allow multi-threading within WGCNA +enableWGCNAThreads() # choose softthreshhold (Calculate soft threshold) +# xtodo allow users to pass args powers <- c(c(1:10), seq(from = 12, to=20, by=2)) sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) -- cgit v1.2.3 From 3d36fe96c94cebb6e7ea93b735148b25c4b95f6d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 12:22:10 +0300 Subject: load data from json file and and convert to dt --- scripts/wgcna_analysis.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 267cd86..d0ba91a 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -2,26 +2,29 @@ library(WGCNA); library(stringi); +library(rjson) options(stringsAsFactors = FALSE); imgDir = Sys.getenv("GENERATED_IMAGE_DIR") -# load expression data **assumes csv format row(traits)(columns info+samples) +# load expression data **assumes from json files row(traits)(columns info+samples) # pass the file_path as arg -inputData <- read.csv(file = "wgcna_data.csv") +results <- fromJSON(file = "file_path.json") -# transform expressionData +# trait_sample_data <- results$trait_sample_data +trait_sample_data <- do.call(rbind, results$trait_sample_data) -dataExpr <- as.data.frame(t(inputData)); -## data cleaning +dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) +# trait_sample_data <- as.data.frame(t(results$trait_sample_data)) +# transform expressionData +dataExpr <- data.frame(t(dataExpr)) gsg = goodSamplesGenes(dataExpr, verbose = 3); # https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/ - if (!gsg$allOK) { if (sum(!gsg$goodGenes)>0) @@ -49,7 +52,7 @@ network <- blockwiseModules(dataExpr, corType = "pearson", #adjacency matrix options - power = sft$powerEstimate, + power = 5, networkType = "unsigned", #TOM options TOMtype = "unsigned", @@ -70,14 +73,13 @@ genImageRandStr <- function(prefix){ return(paste(randStr,".png",sep="")) } -mergedColors <- labels2colors(net$colors) +mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) - png(imageLoc,width=1000,height=600,type='cairo-png') -plotDendroAndColors(network$dendrograms[[1]],mergedColors[net$blockGenes[[1]]], +plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) -- cgit v1.2.3 From 95620e1aef5a9c56875845769d58d2aab20dacca Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 12:37:51 +0300 Subject: pass other variables from user input for network constr --- scripts/wgcna_analysis.R | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index d0ba91a..73d0e3f 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -13,12 +13,19 @@ imgDir = Sys.getenv("GENERATED_IMAGE_DIR") results <- fromJSON(file = "file_path.json") -# trait_sample_data <- results$trait_sample_data -trait_sample_data <- do.call(rbind, results$trait_sample_data) +# parse the json data input + +minModuleSize <-results$minModuleSize + +TOMtype <-results$TOMtype + +corType <-results$corType +# + +trait_sample_data <- do.call(rbind, results$trait_sample_data) dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) -# trait_sample_data <- as.data.frame(t(results$trait_sample_data)) # transform expressionData dataExpr <- data.frame(t(dataExpr)) @@ -49,18 +56,18 @@ sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) # pass user options network <- blockwiseModules(dataExpr, #similarity matrix options - corType = "pearson", + corType = corType, #adjacency matrix options power = 5, networkType = "unsigned", #TOM options - TOMtype = "unsigned", + TOMtype = TOMtype, #module indentification - minmodulesSize = 30, - deepSplit = 5, + minmodulesSize = minModuleSize, + deepSplit = 3, PamRespectsDendro = FALSE ) @@ -76,7 +83,7 @@ genImageRandStr <- function(prefix){ mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) - +imageLoc png(imageLoc,width=1000,height=600,type='cairo-png') plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], -- cgit v1.2.3 From 4a8e40f7331f61ba8b7038e8ad48f86018ac7dc6 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 12:47:27 +0300 Subject: pass json file path as an arg --- scripts/wgcna_analysis.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 73d0e3f..53b59d5 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -7,11 +7,20 @@ library(rjson) options(stringsAsFactors = FALSE); imgDir = Sys.getenv("GENERATED_IMAGE_DIR") - # load expression data **assumes from json files row(traits)(columns info+samples) # pass the file_path as arg +# pass the file path to read json data + +args = commandArgs(trailingOnly=TRUE) + +if (length(args)==0) { + stop("Argument for the file location is requires", call.=FALSE) +} else { + # default output file + json_file_path = args[1] +} -results <- fromJSON(file = "file_path.json") +results <- fromJSON(file = json_file_path) # parse the json data input -- cgit v1.2.3 From 655343aded3ece533f267bd9fd16aadf0cefff02 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 12:49:40 +0300 Subject: add mock test data for script --- scripts/wgcna_test_data.csv | 9 +++++++ scripts/wgcna_test_data.json | 64 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 scripts/wgcna_test_data.csv create mode 100644 scripts/wgcna_test_data.json (limited to 'scripts') diff --git a/scripts/wgcna_test_data.csv b/scripts/wgcna_test_data.csv new file mode 100644 index 0000000..8db9598 --- /dev/null +++ b/scripts/wgcna_test_data.csv @@ -0,0 +1,9 @@ +129S1/SvImJ,A/J,AKR/J,B6D2F1,BALB/cByJ,BALB/cJ +7.142,7.31,7.49,6.899,7.172,7.396 +7.071,7.05,7.313,6.999,7.293,7.117 +7.221,7.246,7.754,6.866,6.752,7.269 +9.221,9.246,9.954,6.866,6.952,9.269 +7.221,7.246,7.754,6.866,6.752,7.269 +7.221,7.246,7.754,6.866,6.752,7.269 +11.221,11.246,11.1154,6.866,6.1152,11.269 + diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json new file mode 100644 index 0000000..7348b4e --- /dev/null +++ b/scripts/wgcna_test_data.json @@ -0,0 +1,64 @@ +{ + "trait_sample_data":[ + { + "129S1/SvImJ": 7.142, + "A/J": 7.31, + "AKR/J": 7.49, + "B6D2F1": 6.899, + "BALB/cByJ": 7.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 7.071, + "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, + "BALB/cByJ": 7.293, + "BALB/cJ": 7.117 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 9.221, + "A/J": 9.246, + "AKR/J": 9.954, + "B6D2F1": 6.866, + "BALB/cByJ": 6.952, + "BALB/cJ": 9.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 11.221, + "A/J": 11.246, + "AKR/J": 11.1154, + "B6D2F1": 6.866, + "BALB/cByJ": 6.1152, + "BALB/cJ": 11.269 + } + ], + "TOMtype": "unsigned", + "minModuleSize": 30, + "corType": "pearson" + +} \ No newline at end of file -- cgit v1.2.3 From 9216b6ae956b5c78a9b6d21dbd40b6df1e111264 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 14:22:14 +0300 Subject: validate required output --- scripts/wgcna_analysis.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 53b59d5..86ddffb 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -14,7 +14,7 @@ imgDir = Sys.getenv("GENERATED_IMAGE_DIR") args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { - stop("Argument for the file location is requires", call.=FALSE) + stop("Argument for the file location is required", call.=FALSE) } else { # default output file json_file_path = args[1] @@ -92,7 +92,6 @@ genImageRandStr <- function(prefix){ mergedColors <- labels2colors(network$colors) imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) -imageLoc png(imageLoc,width=1000,height=600,type='cairo-png') plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], @@ -102,3 +101,13 @@ addGuide = TRUE, guideHang = 0.05) + +json_data <- list(ModEigens=network$MEs,soft_threshold=sft$fitIndices, + blockGenes =network$blockGenes[[1]], + net_colors =network$colors, + net_unmerged=network$unmergedColors, + imageLoc=imageLoc) + +json_data <- toJSON(json_data) + +write(json_data,"./output.json") \ No newline at end of file -- cgit v1.2.3 From eb2277a5fc96795bf6432e392e62601a3cf94058 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 14:22:31 +0300 Subject: sample output data --- scripts/output.json | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 scripts/output.json (limited to 'scripts') diff --git a/scripts/output.json b/scripts/output.json new file mode 100644 index 0000000..9caf837 --- /dev/null +++ b/scripts/output.json @@ -0,0 +1,161 @@ +{ + "ModEigens":{ + "MEturquoise":[ + 0.0646677768085351, + 0.137200224277058, + 0.63451113720732, + -0.544002665501479, + -0.489487590361863, + 0.197111117570427 + ] + }, + "soft_threshold":{ + "Power":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 12, + 14, + 16, + 18, + 20 + ], + "SFT.R.sq":[ + 0.0181093215847335, + 0.00011984142485113, + 0.000171967046945159, + 0.0105462010616537, + 0.0341444584348834, + 0.0687163726151286, + 0.0306423506274298, + 0.0492567394226327, + 0.0789539269997996, + 0.0944880158122527, + 0.122195040270446, + 0.0340768567186139, + 0.0625860126119281, + 0.100082257137014, + 0.128277841930818 + ], + "slope":[ + 3.81011386139005, + -0.170826531149538, + 0.159161787390082, + -1.01047981107833, + -1.55943531024794, + -1.93420125756514, + -1.16799247295814, + -1.33414063070555, + -1.54984944650438, + -1.54715757057087, + -1.49931213589121, + -1.80460140377151, + -2.19055343583319, + -2.52135805259606, + -2.58599487577447 + ], + "truncated.R.sq":[ + 0.768989700952805, + 0.522025793450566, + 0.329341226670409, + 0.110265719555879, + 0.0195649645183151, + -0.0503253079741786, + 0.0507498358330625, + -0.0129255450167765, + -0.035717519210676, + -0.0807094793662766, + -0.0967803564932559, + 0.00172686282662393, + -0.0340811003657508, + -0.0390284600100592, + -0.0489269837827069 + ], + "mean.k.":[ + 4.20178789454309, + 3.44556816856968, + 2.98532344074325, + 2.65297323828966, + 2.39517682414009, + 2.18846935370095, + 2.01963258852289, + 1.87999059876872, + 1.76335304166057, + 1.66510080962817, + 1.51038968360321, + 1.39583176924843, + 1.30882729664563, + 1.24120316437299, + 1.18753154238216 + ], + "median.k.":[ + 4.65733789933094, + 4.13585224131512, + 3.75980713552836, + 3.43775457512904, + 3.15305040369031, + 2.89933881967523, + 2.67225531456956, + 2.46825611568646, + 2.28437024155601, + 2.118086531192, + 1.83011067501282, + 1.59073325345641, + 1.38991168639473, + 1.2201000051609, + 1.07553194658444 + ], + "max.k.":[ + 4.81522245318686, + 4.21987143583501, + 3.83876962723542, + 3.55526976885104, + 3.32904938849614, + 3.14312441404036, + 2.98828051379132, + 2.85837136671219, + 2.74879840851137, + 2.65594228759455, + 2.50929962297015, + 2.40113981571731, + 2.31993775805391, + 2.25792900175825, + 2.2098218130451 + ] + }, + "blockGenes":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "net_colors":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "net_unmerged":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "imageLoc":"/WGCNAoutput_1uujpTIpC.png" +} \ No newline at end of file -- cgit v1.2.3 From fc335fa1394a39d1f9f397cb69c755040ecfc5e1 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 14:37:48 +0300 Subject: append input to output --- scripts/wgcna_analysis.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index 86ddffb..ee749e9 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -20,19 +20,19 @@ if (length(args)==0) { json_file_path = args[1] } -results <- fromJSON(file = json_file_path) +inputData <- fromJSON(file = json_file_path) # parse the json data input -minModuleSize <-results$minModuleSize +minModuleSize <-inputData$minModuleSize -TOMtype <-results$TOMtype +TOMtype <-inputData$TOMtype -corType <-results$corType +corType <-inputData$corType # -trait_sample_data <- do.call(rbind, results$trait_sample_data) +trait_sample_data <- do.call(rbind, inputData$trait_sample_data) dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) # transform expressionData @@ -102,12 +102,12 @@ addGuide = TRUE, guideHang = 0.05) -json_data <- list(ModEigens=network$MEs,soft_threshold=sft$fitIndices, +json_data <- list(input = inputData,output = list(ModEigens=network$MEs,soft_threshold=sft$fitIndices, blockGenes =network$blockGenes[[1]], net_colors =network$colors, net_unmerged=network$unmergedColors, - imageLoc=imageLoc) + imageLoc=imageLoc)) json_data <- toJSON(json_data) -write(json_data,"./output.json") \ No newline at end of file +write(json_data,file= json_file_path) \ No newline at end of file -- cgit v1.2.3 From 9c221e0d89603acd5412be95650a469824e2ab99 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 15:10:16 +0300 Subject: check for na powerEst and use a default value --- scripts/wgcna_analysis.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index ee749e9..c3b1ac8 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -62,13 +62,21 @@ enableWGCNAThreads() powers <- c(c(1:10), seq(from = 12, to=20, by=2)) sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) +# check the power estimate + +if (is.na(sft$powerEstimate)){ + powerEst = 1 +}else{ + powerEst = sft$powerEstimate +} + # pass user options network <- blockwiseModules(dataExpr, #similarity matrix options corType = corType, #adjacency matrix options - power = 5, + power = powerEst, networkType = "unsigned", #TOM options TOMtype = TOMtype, -- cgit v1.2.3 From 3062aee581560ae1928d8e6077366fc072646677 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 23 Sep 2021 21:27:42 +0300 Subject: add traits as columns names and pass as json input --- scripts/wgcna_analysis.R | 18 +++++++++++------- scripts/wgcna_test_data.json | 3 ++- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index c3b1ac8..e641652 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -53,6 +53,8 @@ dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] ## network constructions and modules +names(dataExpr) = inputData$trait_names + # Allow multi-threading within WGCNA enableWGCNAThreads() @@ -82,6 +84,7 @@ network <- blockwiseModules(dataExpr, TOMtype = TOMtype, #module indentification + verbose = 3, minmodulesSize = minModuleSize, deepSplit = 3, @@ -108,13 +111,14 @@ dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05) - - -json_data <- list(input = inputData,output = list(ModEigens=network$MEs,soft_threshold=sft$fitIndices, - blockGenes =network$blockGenes[[1]], - net_colors =network$colors, - net_unmerged=network$unmergedColors, - imageLoc=imageLoc)) +json_data <- list(input = inputData, + output = list(ModEigens=network$MEs, + soft_threshold=sft$fitIndices, + blockGenes =network$blockGenes[[1]], + net_colors =network$colors, + power_used_for_analy=powerEst, + net_unmerged=network$unmergedColors, + imageLoc=imageLoc)) json_data <- toJSON(json_data) diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json index 7348b4e..1d469f4 100644 --- a/scripts/wgcna_test_data.json +++ b/scripts/wgcna_test_data.json @@ -1,4 +1,5 @@ { + "trait_names":["1455537_at","1425637_at","1449593_at","1421945_a_at","1450423_s_at","1423841_at","1451144_at"], "trait_sample_data":[ { "129S1/SvImJ": 7.142, @@ -61,4 +62,4 @@ "minModuleSize": 30, "corType": "pearson" -} \ No newline at end of file +} -- cgit v1.2.3 From 6f25b8e2b1d1a34c054d325b1c37b303529b8827 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 27 Sep 2021 15:15:28 +0300 Subject: remove unnecessary comments and variables --- scripts/wgcna_analysis.R | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'scripts') diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R index e641652..17b3537 100644 --- a/scripts/wgcna_analysis.R +++ b/scripts/wgcna_analysis.R @@ -23,24 +23,14 @@ if (length(args)==0) { inputData <- fromJSON(file = json_file_path) -# parse the json data input - -minModuleSize <-inputData$minModuleSize - -TOMtype <-inputData$TOMtype - -corType <-inputData$corType -# - trait_sample_data <- do.call(rbind, inputData$trait_sample_data) dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) # transform expressionData dataExpr <- data.frame(t(dataExpr)) -gsg = goodSamplesGenes(dataExpr, verbose = 3); +gsg = goodSamplesGenes(dataExpr, verbose = 3) -# https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/ if (!gsg$allOK) { if (sum(!gsg$goodGenes)>0) @@ -75,18 +65,18 @@ if (is.na(sft$powerEstimate)){ # pass user options network <- blockwiseModules(dataExpr, #similarity matrix options - corType = corType, + corType = inputData$corType, #adjacency matrix options power = powerEst, networkType = "unsigned", #TOM options - TOMtype = TOMtype, + TOMtype = inputData$TOMtype, #module indentification verbose = 3, - minmodulesSize = minModuleSize, + minmodulesSize = inputData$minModuleSize, deepSplit = 3, PamRespectsDendro = FALSE ) -- cgit v1.2.3