aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorBonfaceKilz2021-09-28 11:21:07 +0300
committerGitHub2021-09-28 11:21:07 +0300
commit674dc56b9df38e7cf1bbc65a2fc6bf3cc16f7231 (patch)
treef35b34525843542aca50c4f22c6b62ca59d1a057 /scripts
parent0cbb6ecde0315b7d6f021cb17406f5e5197e8a05 (diff)
parent16235188d4ee2ad21a667832baf6cbbea6d8856a (diff)
downloadgenenetwork3-674dc56b9df38e7cf1bbc65a2fc6bf3cc16f7231.tar.gz
Merge pull request #38 from genenetwork/feature/wgcna_analysis
script for wgcna analysis
Diffstat (limited to 'scripts')
-rw-r--r--scripts/output.json161
-rw-r--r--scripts/wgcna_analysis.R115
-rw-r--r--scripts/wgcna_test_data.csv9
-rw-r--r--scripts/wgcna_test_data.json65
4 files changed, 350 insertions, 0 deletions
diff --git a/scripts/output.json b/scripts/output.json
new file mode 100644
index 0000000..9caf837
--- /dev/null
+++ b/scripts/output.json
@@ -0,0 +1,161 @@
+{
+ "ModEigens":{
+ "MEturquoise":[
+ 0.0646677768085351,
+ 0.137200224277058,
+ 0.63451113720732,
+ -0.544002665501479,
+ -0.489487590361863,
+ 0.197111117570427
+ ]
+ },
+ "soft_threshold":{
+ "Power":[
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 12,
+ 14,
+ 16,
+ 18,
+ 20
+ ],
+ "SFT.R.sq":[
+ 0.0181093215847335,
+ 0.00011984142485113,
+ 0.000171967046945159,
+ 0.0105462010616537,
+ 0.0341444584348834,
+ 0.0687163726151286,
+ 0.0306423506274298,
+ 0.0492567394226327,
+ 0.0789539269997996,
+ 0.0944880158122527,
+ 0.122195040270446,
+ 0.0340768567186139,
+ 0.0625860126119281,
+ 0.100082257137014,
+ 0.128277841930818
+ ],
+ "slope":[
+ 3.81011386139005,
+ -0.170826531149538,
+ 0.159161787390082,
+ -1.01047981107833,
+ -1.55943531024794,
+ -1.93420125756514,
+ -1.16799247295814,
+ -1.33414063070555,
+ -1.54984944650438,
+ -1.54715757057087,
+ -1.49931213589121,
+ -1.80460140377151,
+ -2.19055343583319,
+ -2.52135805259606,
+ -2.58599487577447
+ ],
+ "truncated.R.sq":[
+ 0.768989700952805,
+ 0.522025793450566,
+ 0.329341226670409,
+ 0.110265719555879,
+ 0.0195649645183151,
+ -0.0503253079741786,
+ 0.0507498358330625,
+ -0.0129255450167765,
+ -0.035717519210676,
+ -0.0807094793662766,
+ -0.0967803564932559,
+ 0.00172686282662393,
+ -0.0340811003657508,
+ -0.0390284600100592,
+ -0.0489269837827069
+ ],
+ "mean.k.":[
+ 4.20178789454309,
+ 3.44556816856968,
+ 2.98532344074325,
+ 2.65297323828966,
+ 2.39517682414009,
+ 2.18846935370095,
+ 2.01963258852289,
+ 1.87999059876872,
+ 1.76335304166057,
+ 1.66510080962817,
+ 1.51038968360321,
+ 1.39583176924843,
+ 1.30882729664563,
+ 1.24120316437299,
+ 1.18753154238216
+ ],
+ "median.k.":[
+ 4.65733789933094,
+ 4.13585224131512,
+ 3.75980713552836,
+ 3.43775457512904,
+ 3.15305040369031,
+ 2.89933881967523,
+ 2.67225531456956,
+ 2.46825611568646,
+ 2.28437024155601,
+ 2.118086531192,
+ 1.83011067501282,
+ 1.59073325345641,
+ 1.38991168639473,
+ 1.2201000051609,
+ 1.07553194658444
+ ],
+ "max.k.":[
+ 4.81522245318686,
+ 4.21987143583501,
+ 3.83876962723542,
+ 3.55526976885104,
+ 3.32904938849614,
+ 3.14312441404036,
+ 2.98828051379132,
+ 2.85837136671219,
+ 2.74879840851137,
+ 2.65594228759455,
+ 2.50929962297015,
+ 2.40113981571731,
+ 2.31993775805391,
+ 2.25792900175825,
+ 2.2098218130451
+ ]
+ },
+ "blockGenes":[
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "net_colors":{
+ "X1":"turquoise",
+ "X2":"turquoise",
+ "X3":"turquoise",
+ "X4":"turquoise",
+ "X5":"turquoise",
+ "X6":"turquoise",
+ "X7":"turquoise"
+ },
+ "net_unmerged":{
+ "X1":"turquoise",
+ "X2":"turquoise",
+ "X3":"turquoise",
+ "X4":"turquoise",
+ "X5":"turquoise",
+ "X6":"turquoise",
+ "X7":"turquoise"
+ },
+ "imageLoc":"/WGCNAoutput_1uujpTIpC.png"
+} \ No newline at end of file
diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R
new file mode 100644
index 0000000..17b3537
--- /dev/null
+++ b/scripts/wgcna_analysis.R
@@ -0,0 +1,115 @@
+
+
+library(WGCNA);
+library(stringi);
+library(rjson)
+
+options(stringsAsFactors = FALSE);
+
+imgDir = Sys.getenv("GENERATED_IMAGE_DIR")
+# load expression data **assumes from json files row(traits)(columns info+samples)
+# pass the file_path as arg
+# pass the file path to read json data
+
+args = commandArgs(trailingOnly=TRUE)
+
+if (length(args)==0) {
+ stop("Argument for the file location is required", call.=FALSE)
+} else {
+ # default output file
+ json_file_path = args[1]
+}
+
+inputData <- fromJSON(file = json_file_path)
+
+
+trait_sample_data <- do.call(rbind, inputData$trait_sample_data)
+
+dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x))))
+# transform expressionData
+
+dataExpr <- data.frame(t(dataExpr))
+gsg = goodSamplesGenes(dataExpr, verbose = 3)
+
+if (!gsg$allOK)
+{
+if (sum(!gsg$goodGenes)>0)
+printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", ")));
+if (sum(!gsg$goodSamples)>0)
+printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", ")));
+# Remove the offending genes and samples from the data:
+dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes]
+}
+
+## network constructions and modules
+
+names(dataExpr) = inputData$trait_names
+
+# Allow multi-threading within WGCNA
+enableWGCNAThreads()
+
+# choose softthreshhold (Calculate soft threshold)
+# xtodo allow users to pass args
+
+powers <- c(c(1:10), seq(from = 12, to=20, by=2))
+sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5)
+
+# check the power estimate
+
+if (is.na(sft$powerEstimate)){
+ powerEst = 1
+}else{
+ powerEst = sft$powerEstimate
+}
+
+# pass user options
+network <- blockwiseModules(dataExpr,
+ #similarity matrix options
+ corType = inputData$corType,
+ #adjacency matrix options
+
+ power = powerEst,
+ networkType = "unsigned",
+ #TOM options
+ TOMtype = inputData$TOMtype,
+
+ #module indentification
+ verbose = 3,
+
+ minmodulesSize = inputData$minModuleSize,
+ deepSplit = 3,
+ PamRespectsDendro = FALSE
+ )
+
+
+
+genImageRandStr <- function(prefix){
+
+ randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+ return(paste(randStr,".png",sep=""))
+}
+
+mergedColors <- labels2colors(network$colors)
+
+imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput"))
+png(imageLoc,width=1000,height=600,type='cairo-png')
+
+plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]],
+"Module colors",
+dendroLabels = FALSE, hang = 0.03,
+addGuide = TRUE, guideHang = 0.05)
+
+
+json_data <- list(input = inputData,
+ output = list(ModEigens=network$MEs,
+ soft_threshold=sft$fitIndices,
+ blockGenes =network$blockGenes[[1]],
+ net_colors =network$colors,
+ power_used_for_analy=powerEst,
+ net_unmerged=network$unmergedColors,
+ imageLoc=imageLoc))
+
+json_data <- toJSON(json_data)
+
+write(json_data,file= json_file_path) \ No newline at end of file
diff --git a/scripts/wgcna_test_data.csv b/scripts/wgcna_test_data.csv
new file mode 100644
index 0000000..8db9598
--- /dev/null
+++ b/scripts/wgcna_test_data.csv
@@ -0,0 +1,9 @@
+129S1/SvImJ,A/J,AKR/J,B6D2F1,BALB/cByJ,BALB/cJ
+7.142,7.31,7.49,6.899,7.172,7.396
+7.071,7.05,7.313,6.999,7.293,7.117
+7.221,7.246,7.754,6.866,6.752,7.269
+9.221,9.246,9.954,6.866,6.952,9.269
+7.221,7.246,7.754,6.866,6.752,7.269
+7.221,7.246,7.754,6.866,6.752,7.269
+11.221,11.246,11.1154,6.866,6.1152,11.269
+
diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json
new file mode 100644
index 0000000..1d469f4
--- /dev/null
+++ b/scripts/wgcna_test_data.json
@@ -0,0 +1,65 @@
+{
+ "trait_names":["1455537_at","1425637_at","1449593_at","1421945_a_at","1450423_s_at","1423841_at","1451144_at"],
+ "trait_sample_data":[
+ {
+ "129S1/SvImJ": 7.142,
+ "A/J": 7.31,
+ "AKR/J": 7.49,
+ "B6D2F1": 6.899,
+ "BALB/cByJ": 7.172,
+ "BALB/cJ": 7.396
+ },
+ {
+ "129S1/SvImJ": 7.071,
+ "A/J": 7.05,
+ "AKR/J": 7.313,
+ "B6D2F1": 6.999,
+ "BALB/cByJ": 7.293,
+ "BALB/cJ": 7.117
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 9.221,
+ "A/J": 9.246,
+ "AKR/J": 9.954,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.952,
+ "BALB/cJ": 9.269
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 11.221,
+ "A/J": 11.246,
+ "AKR/J": 11.1154,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.1152,
+ "BALB/cJ": 11.269
+ }
+ ],
+ "TOMtype": "unsigned",
+ "minModuleSize": 30,
+ "corType": "pearson"
+
+}