diff options
author | BonfaceKilz | 2021-09-28 11:21:07 +0300 |
---|---|---|
committer | GitHub | 2021-09-28 11:21:07 +0300 |
commit | 674dc56b9df38e7cf1bbc65a2fc6bf3cc16f7231 (patch) | |
tree | f35b34525843542aca50c4f22c6b62ca59d1a057 /scripts | |
parent | 0cbb6ecde0315b7d6f021cb17406f5e5197e8a05 (diff) | |
parent | 16235188d4ee2ad21a667832baf6cbbea6d8856a (diff) | |
download | genenetwork3-674dc56b9df38e7cf1bbc65a2fc6bf3cc16f7231.tar.gz |
Merge pull request #38 from genenetwork/feature/wgcna_analysis
script for wgcna analysis
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/output.json | 161 | ||||
-rw-r--r-- | scripts/wgcna_analysis.R | 115 | ||||
-rw-r--r-- | scripts/wgcna_test_data.csv | 9 | ||||
-rw-r--r-- | scripts/wgcna_test_data.json | 65 |
4 files changed, 350 insertions, 0 deletions
diff --git a/scripts/output.json b/scripts/output.json new file mode 100644 index 0000000..9caf837 --- /dev/null +++ b/scripts/output.json @@ -0,0 +1,161 @@ +{ + "ModEigens":{ + "MEturquoise":[ + 0.0646677768085351, + 0.137200224277058, + 0.63451113720732, + -0.544002665501479, + -0.489487590361863, + 0.197111117570427 + ] + }, + "soft_threshold":{ + "Power":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 12, + 14, + 16, + 18, + 20 + ], + "SFT.R.sq":[ + 0.0181093215847335, + 0.00011984142485113, + 0.000171967046945159, + 0.0105462010616537, + 0.0341444584348834, + 0.0687163726151286, + 0.0306423506274298, + 0.0492567394226327, + 0.0789539269997996, + 0.0944880158122527, + 0.122195040270446, + 0.0340768567186139, + 0.0625860126119281, + 0.100082257137014, + 0.128277841930818 + ], + "slope":[ + 3.81011386139005, + -0.170826531149538, + 0.159161787390082, + -1.01047981107833, + -1.55943531024794, + -1.93420125756514, + -1.16799247295814, + -1.33414063070555, + -1.54984944650438, + -1.54715757057087, + -1.49931213589121, + -1.80460140377151, + -2.19055343583319, + -2.52135805259606, + -2.58599487577447 + ], + "truncated.R.sq":[ + 0.768989700952805, + 0.522025793450566, + 0.329341226670409, + 0.110265719555879, + 0.0195649645183151, + -0.0503253079741786, + 0.0507498358330625, + -0.0129255450167765, + -0.035717519210676, + -0.0807094793662766, + -0.0967803564932559, + 0.00172686282662393, + -0.0340811003657508, + -0.0390284600100592, + -0.0489269837827069 + ], + "mean.k.":[ + 4.20178789454309, + 3.44556816856968, + 2.98532344074325, + 2.65297323828966, + 2.39517682414009, + 2.18846935370095, + 2.01963258852289, + 1.87999059876872, + 1.76335304166057, + 1.66510080962817, + 1.51038968360321, + 1.39583176924843, + 1.30882729664563, + 1.24120316437299, + 1.18753154238216 + ], + "median.k.":[ + 4.65733789933094, + 4.13585224131512, + 3.75980713552836, + 3.43775457512904, + 3.15305040369031, + 2.89933881967523, + 2.67225531456956, + 2.46825611568646, + 2.28437024155601, + 2.118086531192, + 1.83011067501282, + 1.59073325345641, + 1.38991168639473, + 1.2201000051609, + 1.07553194658444 + ], + "max.k.":[ + 4.81522245318686, + 4.21987143583501, + 3.83876962723542, + 3.55526976885104, + 3.32904938849614, + 3.14312441404036, + 2.98828051379132, + 2.85837136671219, + 2.74879840851137, + 2.65594228759455, + 2.50929962297015, + 2.40113981571731, + 2.31993775805391, + 2.25792900175825, + 2.2098218130451 + ] + }, + "blockGenes":[ + 1, + 2, + 3, + 4, + 5, + 6, + 7 + ], + "net_colors":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "net_unmerged":{ + "X1":"turquoise", + "X2":"turquoise", + "X3":"turquoise", + "X4":"turquoise", + "X5":"turquoise", + "X6":"turquoise", + "X7":"turquoise" + }, + "imageLoc":"/WGCNAoutput_1uujpTIpC.png" +}
\ No newline at end of file diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R new file mode 100644 index 0000000..17b3537 --- /dev/null +++ b/scripts/wgcna_analysis.R @@ -0,0 +1,115 @@ + + +library(WGCNA); +library(stringi); +library(rjson) + +options(stringsAsFactors = FALSE); + +imgDir = Sys.getenv("GENERATED_IMAGE_DIR") +# load expression data **assumes from json files row(traits)(columns info+samples) +# pass the file_path as arg +# pass the file path to read json data + +args = commandArgs(trailingOnly=TRUE) + +if (length(args)==0) { + stop("Argument for the file location is required", call.=FALSE) +} else { + # default output file + json_file_path = args[1] +} + +inputData <- fromJSON(file = json_file_path) + + +trait_sample_data <- do.call(rbind, inputData$trait_sample_data) + +dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x)))) +# transform expressionData + +dataExpr <- data.frame(t(dataExpr)) +gsg = goodSamplesGenes(dataExpr, verbose = 3) + +if (!gsg$allOK) +{ +if (sum(!gsg$goodGenes)>0) +printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", "))); +if (sum(!gsg$goodSamples)>0) +printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", "))); +# Remove the offending genes and samples from the data: +dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes] +} + +## network constructions and modules + +names(dataExpr) = inputData$trait_names + +# Allow multi-threading within WGCNA +enableWGCNAThreads() + +# choose softthreshhold (Calculate soft threshold) +# xtodo allow users to pass args + +powers <- c(c(1:10), seq(from = 12, to=20, by=2)) +sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5) + +# check the power estimate + +if (is.na(sft$powerEstimate)){ + powerEst = 1 +}else{ + powerEst = sft$powerEstimate +} + +# pass user options +network <- blockwiseModules(dataExpr, + #similarity matrix options + corType = inputData$corType, + #adjacency matrix options + + power = powerEst, + networkType = "unsigned", + #TOM options + TOMtype = inputData$TOMtype, + + #module indentification + verbose = 3, + + minmodulesSize = inputData$minModuleSize, + deepSplit = 3, + PamRespectsDendro = FALSE + ) + + + +genImageRandStr <- function(prefix){ + + randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_") + + return(paste(randStr,".png",sep="")) +} + +mergedColors <- labels2colors(network$colors) + +imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput")) +png(imageLoc,width=1000,height=600,type='cairo-png') + +plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]], +"Module colors", +dendroLabels = FALSE, hang = 0.03, +addGuide = TRUE, guideHang = 0.05) + + +json_data <- list(input = inputData, + output = list(ModEigens=network$MEs, + soft_threshold=sft$fitIndices, + blockGenes =network$blockGenes[[1]], + net_colors =network$colors, + power_used_for_analy=powerEst, + net_unmerged=network$unmergedColors, + imageLoc=imageLoc)) + +json_data <- toJSON(json_data) + +write(json_data,file= json_file_path)
\ No newline at end of file diff --git a/scripts/wgcna_test_data.csv b/scripts/wgcna_test_data.csv new file mode 100644 index 0000000..8db9598 --- /dev/null +++ b/scripts/wgcna_test_data.csv @@ -0,0 +1,9 @@ +129S1/SvImJ,A/J,AKR/J,B6D2F1,BALB/cByJ,BALB/cJ
+7.142,7.31,7.49,6.899,7.172,7.396
+7.071,7.05,7.313,6.999,7.293,7.117
+7.221,7.246,7.754,6.866,6.752,7.269
+9.221,9.246,9.954,6.866,6.952,9.269
+7.221,7.246,7.754,6.866,6.752,7.269
+7.221,7.246,7.754,6.866,6.752,7.269
+11.221,11.246,11.1154,6.866,6.1152,11.269
+
diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json new file mode 100644 index 0000000..1d469f4 --- /dev/null +++ b/scripts/wgcna_test_data.json @@ -0,0 +1,65 @@ +{ + "trait_names":["1455537_at","1425637_at","1449593_at","1421945_a_at","1450423_s_at","1423841_at","1451144_at"], + "trait_sample_data":[ + { + "129S1/SvImJ": 7.142, + "A/J": 7.31, + "AKR/J": 7.49, + "B6D2F1": 6.899, + "BALB/cByJ": 7.172, + "BALB/cJ": 7.396 + }, + { + "129S1/SvImJ": 7.071, + "A/J": 7.05, + "AKR/J": 7.313, + "B6D2F1": 6.999, + "BALB/cByJ": 7.293, + "BALB/cJ": 7.117 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 9.221, + "A/J": 9.246, + "AKR/J": 9.954, + "B6D2F1": 6.866, + "BALB/cByJ": 6.952, + "BALB/cJ": 9.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 7.221, + "A/J": 7.246, + "AKR/J": 7.754, + "B6D2F1": 6.866, + "BALB/cByJ": 6.752, + "BALB/cJ": 7.269 + }, + { + "129S1/SvImJ": 11.221, + "A/J": 11.246, + "AKR/J": 11.1154, + "B6D2F1": 6.866, + "BALB/cByJ": 6.1152, + "BALB/cJ": 11.269 + } + ], + "TOMtype": "unsigned", + "minModuleSize": 30, + "corType": "pearson" + +} |