aboutsummaryrefslogtreecommitdiff
path: root/scripts/wgcna_analysis.R
blob: a5cbbe36c5abad8ffef98bac5659f4c4ec51fea5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
library(WGCNA);
library(stringi);
library(rjson)

options(stringsAsFactors = FALSE);

cat("Running the wgcna analysis script\n")

# load expression data **assumes from json files row(traits)(columns info+samples)
# pass the file_path as arg
# pass the file path to read json data




args = commandArgs(trailingOnly=TRUE)

if (length(args)==0) {
  stop("Argument for the file location is required", call.=FALSE)
} else {
  # default output file
  json_file_path  = args[1]
}

inputData <- fromJSON(file = json_file_path)
imgDir = inputData$TMPDIR


trait_sample_data <- do.call(rbind, inputData$trait_sample_data)

dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x))))
# transform expressionData

dataExpr <- data.frame(t(dataExpr))
gsg = goodSamplesGenes(dataExpr, verbose = 3)

if (!gsg$allOK)
{
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes]
}

## network constructions and modules

names(dataExpr) = inputData$trait_names

# Allow multi-threading within WGCNA
enableWGCNAThreads()

# choose softthreshhold (Calculate soft threshold)
# xtodo allow users to pass args

powers <- c(c(1:10), seq(from = 12, to=20, by=2)) 
sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5)

# check the power estimate

if (is.na(sft$powerEstimate)){
  powerEst = 1
}else{
  powerEst = sft$powerEstimate
}

# pass user options
network <- blockwiseModules(dataExpr,
                  #similarity  matrix options
                  corType = inputData$corType,
                  #adjacency  matrix options

                  power = powerEst,
                  networkType = "unsigned",
                  #TOM options
                  TOMtype =  inputData$TOMtype,

                  #module indentification
                  verbose = 3,

                  minmodulesSize = inputData$minModuleSize,
                  deepSplit = 3,
                  PamRespectsDendro = FALSE
                )



cat("Generated network \n")

network


genImageRandStr <- function(prefix){

	randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")

	return(paste(randStr,".png",sep=""))
}

mergedColors <- labels2colors(network$colors)

imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput"))
png(imageLoc,width=1000,height=600,type='cairo-png')


cat("Generating the CLuster  dendrogram\n")


plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)


json_data <- list(input = inputData,
  output = list(ModEigens=network$MEs,
   soft_threshold=sft$fitIndices,
   blockGenes =network$blockGenes[[1]],
   net_colors =network$colors,
   power_used_for_analy=powerEst,
   net_unmerged=network$unmergedColors,
   imageLoc=imageLoc))

json_data <- toJSON(json_data)

write(json_data,file= json_file_path)