+
WGCNA analysis parameters
Phenotypes used as input: {{request.form}}
--
cgit v1.2.3
From b47cd2fca0c9a835b15288aa4959867eaeb00f5a Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Sun, 20 Sep 2015 12:12:04 +0200
Subject: Transfering data from python to R for WGCNA analysis using Rpy2
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 50 ++++++++++++++++++++++++++-------
1 file changed, 40 insertions(+), 10 deletions(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 84cdc241..5316fcd4 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -1,9 +1,10 @@
# WGCNA analysis for GN2
# Author / Maintainer: Danny Arends
-
+import sys
from numpy import *
import scipy as sp # SciPy
import rpy2.robjects as ro # R Objects
+import rpy2.rinterface as ri
from utility import helper_functions
@@ -16,6 +17,13 @@ r_options = ro.r["options"] # Map the options function
r_read_csv = ro.r["read.csv"] # Map the read.csv function
r_dim = ro.r["dim"] # Map the dim function
r_c = ro.r["c"] # Map the c function
+r_cat = ro.r["cat"] # Map the cat function
+r_paste = ro.r["paste"] # Map the paste function
+r_unlist = ro.r["unlist"] # Map the unlist function
+r_unique = ro.r["unique"] # Map the unique function
+r_length = ro.r["length"] # Map the length function
+r_matrix = ro.r["matrix"] # Map the matrix function
+r_list = ro.r["list"] # Map the list function
r_seq = ro.r["seq"] # Map the seq function
r_table = ro.r["table"] # Map the table function
r_names = ro.r["names"] # Map the names function
@@ -46,25 +54,47 @@ class WGCNA(object):
self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
print("Retrieved phenotype data from database", requestform['trait_list'])
helper_functions.get_trait_db_obs(self, self.trait_db_list)
- self.results = {}
- for t in self.trait_list:
- strains = []
- for s in t[0].data:
- strains.append(s)
- self.results[t] = strains
- print("Retrieved phenotype data from database")
+ self.input = {} # self.input contains the phenotype values we need to send to R
+ strains = [] # All the strains we have data for (contains duplicates)
+ traits = [] # All the traits we have data for (should not contain duplicates)
+ for trait in self.trait_list:
+ traits.append(trait[0].name)
+ self.input[trait[0].name] = {}
+ for strain in trait[0].data:
+ strains.append(strain)
+ self.input[trait[0].name][strain] = trait[0].data[strain].value
+
+ # Transfer the load data from python to R
+ uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector
+ uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector
+
+ r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
+ r_cat("The number of unique traits:", r_length(uTraitsR), "\n")
+
+ # rM is the datamatrix holding all the data in R /rows = strains columns = traits
+ rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR))
+ for t in uTraitsR:
+ trait = t[0] # R uses vectors every single element is a vector
+ for s in uStrainsR:
+ strain = s[0] # R uses vectors every single element is a vector
+ rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location
+ print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
+ sys.stdout.flush()
+
# TODO:
- # Load data into R
+ self.results = {}
# Calculate a good soft threshold
# Create block wise modules using WGCNA
# How many modules and how many gene per module ?
# Show the iconic WCGNA plot of the modules in the hanging tree
+ sys.stdout.flush()
def process_results(self, results):
print("Processing WGCNA output")
template_vars = {}
- template_vars["result"] = self.results
+ template_vars["input"] = self.input
+ template_vars["results"] = self.results
#r_sink(type = "message") # This restores R output to the stdout/stderr
#r_sink() # We should end the Rpy session more or less
return(dict(template_vars))
--
cgit v1.2.3
From c3787a404f64b1d77387d5156be1ad3271a08dee Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Sun, 20 Sep 2015 12:51:08 +0200
Subject: Display the results from WGCNA updated
---
wqflask/wqflask/templates/wgcna_results.html | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/templates/wgcna_results.html b/wqflask/wqflask/templates/wgcna_results.html
index 79c321e5..9592c25f 100644
--- a/wqflask/wqflask/templates/wgcna_results.html
+++ b/wqflask/wqflask/templates/wgcna_results.html
@@ -7,12 +7,17 @@
Phenotype / Module table:
Phenotype | Module | Weight |
- {% for k in result %}
- {{k[0].name}} | | |
+ {% for k in results['network'] %}
+ {{k}} | | |
{% endfor %}
- WGCNA module plot
-
+ WGCNA module plot
+
+
{% endblock %}
--
cgit v1.2.3
From 30f314c1572aeea2576c54c5c4c8f304932a5303 Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Sun, 20 Sep 2015 12:52:16 +0200
Subject: Now able to run WGCNA analysis on a user defined collection, still
lots of todos
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 37 +++++++++++++++++++++++++++++----
1 file changed, 33 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 5316fcd4..74bb6f8e 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -6,6 +6,11 @@ import scipy as sp # SciPy
import rpy2.robjects as ro # R Objects
import rpy2.rinterface as ri
+from base import webqtlConfig # For paths and stuff
+
+import base64
+import array
+
from utility import helper_functions
from rpy2.robjects.packages import importr
@@ -22,8 +27,8 @@ r_paste = ro.r["paste"] # Map the paste function
r_unlist = ro.r["unlist"] # Map the unlist function
r_unique = ro.r["unique"] # Map the unique function
r_length = ro.r["length"] # Map the length function
-r_matrix = ro.r["matrix"] # Map the matrix function
-r_list = ro.r["list"] # Map the list function
+r_list = ro.r.list # Map the list function
+r_matrix = ro.r.matrix # Map the matrix function
r_seq = ro.r["seq"] # Map the seq function
r_table = ro.r["table"] # Map the table function
r_names = ro.r["names"] # Map the names function
@@ -82,19 +87,43 @@ class WGCNA(object):
print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
sys.stdout.flush()
- # TODO:
+ # TODO: Get the user specified parameters
+
self.results = {}
# Calculate a good soft threshold
+ powers = r_c(r_c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), r_seq(12, 20, 2))
+ sft = self.r_pickSoftThreshold(rM, powerVector = powers, verbose = 5)
+
# Create block wise modules using WGCNA
+ network = self.r_blockwiseModules(rM, power = 6, verbose = 3)
+
+ self.results['network'] = network
+
# How many modules and how many gene per module ?
- # Show the iconic WCGNA plot of the modules in the hanging tree
+ print(r_table(network[1]))
+
+ # The iconic WCGNA plot of the modules in the hanging tree
+ self.results['imgurl'] = webqtlConfig.TMPDIR + "WGCNAoutput.png"
+ r_png(self.results['imgurl'])
+ mergedColors = self.r_labels2colors(network[1])
+ self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors",dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05)
+ r_dev_off()
sys.stdout.flush()
+ def render_image(self, results):
+ print("pre-loading imgage results:", self.results['imgurl'])
+ imgfile = open(self.results['imgurl'], 'rb')
+ imgdata = imgfile.read()
+ imgB64 = imgdata.encode("base64")
+ bytesarray = array.array('B', imgB64)
+ self.results['imgdata'] = bytesarray
+
def process_results(self, results):
print("Processing WGCNA output")
template_vars = {}
template_vars["input"] = self.input
template_vars["results"] = self.results
+ self.render_image(results)
#r_sink(type = "message") # This restores R output to the stdout/stderr
#r_sink() # We should end the Rpy session more or less
return(dict(template_vars))
--
cgit v1.2.3
From 8a5dbce3273ba296a342054e2b2c23866006b5b4 Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Sun, 20 Sep 2015 13:05:37 +0200
Subject: Using the webqtlConfig.TMPDIR constant and the webqtlUtil.genRandStr
function
---
wqflask/wqflask/templates/wgcna_results.html | 2 +-
wqflask/wqflask/views.py | 2 +-
wqflask/wqflask/wgcna/wgcna_analysis.py | 14 +++++++++-----
3 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/wqflask/wqflask/templates/wgcna_results.html b/wqflask/wqflask/templates/wgcna_results.html
index 9592c25f..b8a4a4f5 100644
--- a/wqflask/wqflask/templates/wgcna_results.html
+++ b/wqflask/wqflask/templates/wgcna_results.html
@@ -12,7 +12,7 @@
{% endfor %}
WGCNA module plot
-
+
module table
---
wqflask/wqflask/templates/wgcna_results.html | 32 ++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/templates/wgcna_results.html b/wqflask/wqflask/templates/wgcna_results.html
index b8a4a4f5..077238a5 100644
--- a/wqflask/wqflask/templates/wgcna_results.html
+++ b/wqflask/wqflask/templates/wgcna_results.html
@@ -4,13 +4,33 @@
{% block content %}
WGCNA Results
- Phenotype / Module table:
-
- Phenotype | Module | Weight |
- {% for k in results['network'] %}
- {{k}} | | |
- {% endfor %}
+ Soft threshold table
+
+
+ Phenotype / Module table
+
+ debug: {{results['network']|length}}
+ debug: {{results['network'][0]|length}}
+ {{phenotypes[0][0]}}
+
+ Phenotype | Module |
+ {% for r in range(results['network'][0]|length) %}
+
+ {{phenotypes[r][0]}} |
+ {{results['network'][0][r]}} |
+
+ {% endfor %}
+
+
WGCNA module plot
WGCNA Results
- Soft threshold table
+ Analysis found {{results['nmod']}} modules when scanning {{results['nphe']}} phenotypes, measured on {{results['nstr']}} strains.
+ Additional parameters settings:
+
+ - Soft thresholds checked = {{results['requestform']['SoftThresholds']}}
+ - Power used for this analysis = {{results['Power']}}
+ - TomType = {{results['requestform']['TOMtype']}}
+ - Minimum module size = {{results['requestform']['MinModuleSize'] }}
+ - mergeCutHeight = {{results['requestform']['mergeCutHeight'] }}
+
+
+ Soft threshold table
+ WGCNA module plot
+
+
- Phenotype / Module table
- debug: {{results['network']|length}}
- debug: {{results['network'][0]|length}}
- {{phenotypes[0][0]}}
+ Phenotype / Module table
Phenotype | Module |
- {% for r in range(results['network'][0]|length) %}
+ {% for r in range(results['nphe']) %}
- {{phenotypes[r][0]}} |
+ {{results['phenotypes'][r][0]}} |
{{results['network'][0][r]}} |
{% endfor %}
- WGCNA module plot
-
-
+ Module eigen genes
+
{% endblock %}
diff --git a/wqflask/wqflask/templates/wgcna_setup.html b/wqflask/wqflask/templates/wgcna_setup.html
index 821e4954..49181938 100644
--- a/wqflask/wqflask/templates/wgcna_setup.html
+++ b/wqflask/wqflask/templates/wgcna_setup.html
@@ -6,17 +6,22 @@
WGCNA analysis parameters
- Phenotypes used as input: {{request.form}}
-
+ {% if request.form['trait_list'].split(",")|length <= 4 %}
+
+
ERROR: Too few phenotypes as input
+ Please make sure you select enough phenotypes / genes to perform WGCNA,
+ your collection needs to contain at least 4 different phenotypes. You provided {{request.form['trait_list'].split(',')|length}} phenotypes as input
+ {% else %}
+
+ {% endif %}
{% endblock %}
--
cgit v1.2.3
From 03c8f0c27b55a2aca93b925ac92cd3650ea6131a Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Thu, 8 Oct 2015 10:51:06 +0200
Subject: User inputs are now passed to the algorithm, and power is tested, and
autoselected. We need to discuss which parameters we want to expose to the
user.
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 38 +++++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 0cf4eeaf..b5e01ece 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -28,12 +28,14 @@ r_paste = ro.r["paste"] # Map the paste function
r_unlist = ro.r["unlist"] # Map the unlist function
r_unique = ro.r["unique"] # Map the unique function
r_length = ro.r["length"] # Map the length function
+r_unlist = ro.r["unlist"] # Map the unlist function
r_list = ro.r.list # Map the list function
r_matrix = ro.r.matrix # Map the matrix function
r_seq = ro.r["seq"] # Map the seq function
r_table = ro.r["table"] # Map the table function
r_names = ro.r["names"] # Map the names function
r_sink = ro.r["sink"] # Map the sink function
+r_is_NA = ro.r["is.na"] # Map the is.na function
r_file = ro.r["file"] # Map the file function
r_png = ro.r["png"] # Map the png function for plotting
r_dev_off = ro.r["dev.off"] # Map the dev.off function
@@ -74,7 +76,6 @@ class WGCNA(object):
# Transfer the load data from python to R
uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector
uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector
- self.phenotypes = uTraitsR
r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
r_cat("The number of unique traits:", r_length(uTraitsR), "\n")
@@ -86,29 +87,49 @@ class WGCNA(object):
for s in uStrainsR:
strain = s[0] # R uses vectors every single element is a vector
rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location
- print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
+ #print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
sys.stdout.flush()
# TODO: Get the user specified parameters
self.results = {}
- # Calculate a good soft threshold
- powers = r_c(r_c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), r_seq(12, 20, 2))
- self.sft = self.r_pickSoftThreshold(rM, powerVector = powers, verbose = 5)
+ self.results['nphe'] = r_length(uTraitsR)[0]
+ self.results['nstr'] = r_length(uStrainsR)[0]
+ self.results['phenotypes'] = uTraitsR
+ self.results['strains'] = uStrainsR
+ self.results['requestform'] = requestform
+
+ # Calculate soft threshold if the user specified the SoftThreshold variable
+ if requestform.get('SoftThresholds') is not None:
+ powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")]
+ rpow = r_unlist(r_c(powers))
+ print "SoftThresholds: {} == {}".format(powers, rpow)
+ self.sft = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5)
+
+ print "PowerEstimate: {}".format(self.sft[0])
+ self.results['PowerEstimate'] = self.sft[0]
+ if r_is_NA(self.sft[0]):
+ self.results['Power'] = 1
+ else:
+ self.results['Power'] = self.sft[0][0]
+ else:
+ # The user clicked a button, so no soft threshold selection, just use the value the user gives
+ self.results['Power'] = requestform.get('Power')
# Create block wise modules using WGCNA
- network = self.r_blockwiseModules(rM, power = 6, verbose = 3)
+ network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3)
# Save the network for the GUI
self.results['network'] = network
# How many modules and how many gene per module ?
- print(r_table(network[1]))
+ print "WGCNA found {} modules".format(r_table(network[1]))
+ self.results['nmod'] = r_length(r_table(network[1]))[0]
# The iconic WCGNA plot of the modules in the hanging tree
self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl']
- r_png(self.results['imgloc'])
+ r_png(self.results['imgloc'], width=1000, height=600)
mergedColors = self.r_labels2colors(network[1])
self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05)
r_dev_off()
@@ -126,7 +147,6 @@ class WGCNA(object):
print("Processing WGCNA output")
template_vars = {}
template_vars["input"] = self.input
- template_vars["phenotypes"] = self.phenotypes
template_vars["powers"] = self.sft[1:] # Results from the soft threshold analysis
template_vars["results"] = self.results
self.render_image(results)
--
cgit v1.2.3
From b4370bebdb1f33184ffa22038cbc5d877edde5c6 Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Thu, 8 Oct 2015 10:55:11 +0200
Subject: Adding more comments
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index b5e01ece..1174ce47 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -87,17 +87,15 @@ class WGCNA(object):
for s in uStrainsR:
strain = s[0] # R uses vectors every single element is a vector
rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location
- #print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
+ #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
sys.stdout.flush()
- # TODO: Get the user specified parameters
-
self.results = {}
- self.results['nphe'] = r_length(uTraitsR)[0]
- self.results['nstr'] = r_length(uStrainsR)[0]
- self.results['phenotypes'] = uTraitsR
- self.results['strains'] = uStrainsR
- self.results['requestform'] = requestform
+ self.results['nphe'] = r_length(uTraitsR)[0] # Number of phenotypes/traits
+ self.results['nstr'] = r_length(uStrainsR)[0] # Number of strains
+ self.results['phenotypes'] = uTraitsR # Traits used
+ self.results['strains'] = uStrainsR # Strains used in the analysis
+ self.results['requestform'] = requestform # Store the user specified parameters for the output page
# Calculate soft threshold if the user specified the SoftThreshold variable
if requestform.get('SoftThresholds') is not None:
@@ -109,14 +107,14 @@ class WGCNA(object):
print "PowerEstimate: {}".format(self.sft[0])
self.results['PowerEstimate'] = self.sft[0]
if r_is_NA(self.sft[0]):
- self.results['Power'] = 1
+ self.results['Power'] = 1 # No power could be estimated
else:
- self.results['Power'] = self.sft[0][0]
+ self.results['Power'] = self.sft[0][0] # Use the estimated power
else:
- # The user clicked a button, so no soft threshold selection, just use the value the user gives
- self.results['Power'] = requestform.get('Power')
+ # The user clicked a button, so no soft threshold selection
+ self.results['Power'] = requestform.get('Power') # Use the power value the user gives
- # Create block wise modules using WGCNA
+ # Create the block wise modules using WGCNA
network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3)
# Save the network for the GUI
--
cgit v1.2.3
From 8f25786a98447039362f74b8a37dbeacf5f07156 Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Thu, 8 Oct 2015 11:38:29 +0200
Subject: Minor, adding a better check for NA
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 1174ce47..ba806b6d 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -106,7 +106,8 @@ class WGCNA(object):
print "PowerEstimate: {}".format(self.sft[0])
self.results['PowerEstimate'] = self.sft[0]
- if r_is_NA(self.sft[0]):
+ if self.sft[0][0] == "NA":
+ print "No power is suitable for the analysis, just use 1"
self.results['Power'] = 1 # No power could be estimated
else:
self.results['Power'] = self.sft[0][0] # Use the estimated power
--
cgit v1.2.3
From bde835abcd76b495da82b4e107ac5e20e5663e05 Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Thu, 8 Oct 2015 11:42:27 +0200
Subject: Using the correct NA from Rpy2
---
wqflask/wqflask/wgcna/wgcna_analysis.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index ba806b6d..a714319d 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -106,7 +106,7 @@ class WGCNA(object):
print "PowerEstimate: {}".format(self.sft[0])
self.results['PowerEstimate'] = self.sft[0]
- if self.sft[0][0] == "NA":
+ if self.sft[0][0] is ri.NA_Integer:
print "No power is suitable for the analysis, just use 1"
self.results['Power'] = 1 # No power could be estimated
else:
--
cgit v1.2.3