aboutsummaryrefslogtreecommitdiff
path: root/gn2/wqflask/wgcna/wgcna_analysis.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/wqflask/wgcna/wgcna_analysis.py')
-rw-r--r--gn2/wqflask/wgcna/wgcna_analysis.py189
1 files changed, 189 insertions, 0 deletions
diff --git a/gn2/wqflask/wgcna/wgcna_analysis.py b/gn2/wqflask/wgcna/wgcna_analysis.py
new file mode 100644
index 00000000..f982c021
--- /dev/null
+++ b/gn2/wqflask/wgcna/wgcna_analysis.py
@@ -0,0 +1,189 @@
+"""
+WGCNA analysis for GN2
+
+Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+"""
+import base64
+import sys
+import rpy2.robjects as ro # R Objects
+import rpy2.rinterface as ri
+
+from array import array as arr
+from numpy import *
+from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR
+from rpy2.robjects.packages import importr
+
+from gn2.utility import webqtlUtil # Random number for the image
+from gn2.utility import helper_functions
+
+utils = importr("utils")
+
+# Get pointers to some common R functions
+r_library = ro.r["library"] # Map the library function
+r_options = ro.r["options"] # Map the options function
+r_read_csv = ro.r["read.csv"] # Map the read.csv function
+r_dim = ro.r["dim"] # Map the dim function
+r_c = ro.r["c"] # Map the c function
+r_cat = ro.r["cat"] # Map the cat function
+r_paste = ro.r["paste"] # Map the paste function
+r_unlist = ro.r["unlist"] # Map the unlist function
+r_unique = ro.r["unique"] # Map the unique function
+r_length = ro.r["length"] # Map the length function
+r_unlist = ro.r["unlist"] # Map the unlist function
+r_list = ro.r.list # Map the list function
+r_matrix = ro.r.matrix # Map the matrix function
+r_seq = ro.r["seq"] # Map the seq function
+r_table = ro.r["table"] # Map the table function
+r_names = ro.r["names"] # Map the names function
+r_sink = ro.r["sink"] # Map the sink function
+r_is_NA = ro.r["is.na"] # Map the is.na function
+r_file = ro.r["file"] # Map the file function
+r_png = ro.r["png"] # Map the png function for plotting
+r_dev_off = ro.r["dev.off"] # Map the dev.off function
+
+
+class WGCNA:
+ def __init__(self):
+ # To log output from stdout/stderr to a file add `r_sink(log)`
+ print("Initialization of WGCNA")
+
+ # Load WGCNA - Should only be done once, since it is quite expensive
+ r_library("WGCNA")
+ r_options(stringsAsFactors=False)
+ print("Initialization of WGCNA done, package loaded in R session")
+ # Map the enableWGCNAThreads function
+ self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"]
+ # Map the pickSoftThreshold function
+ self.r_pickSoftThreshold = ro.r["pickSoftThreshold"]
+ # Map the blockwiseModules function
+ self.r_blockwiseModules = ro.r["blockwiseModules"]
+ # Map the labels2colors function
+ self.r_labels2colors = ro.r["labels2colors"]
+ # Map the plotDendroAndColors function
+ self.r_plotDendroAndColors = ro.r["plotDendroAndColors"]
+ print("Obtained pointers to WGCNA functions")
+
+ def run_analysis(self, requestform):
+ print("Starting WGCNA analysis on dataset")
+ # Enable multi threading
+ self.r_enableWGCNAThreads()
+ self.trait_db_list = [trait.strip()
+ for trait in requestform['trait_list'].split(',')]
+ print(("Retrieved phenotype data from database",
+ requestform['trait_list']))
+ helper_functions.get_trait_db_obs(self, self.trait_db_list)
+
+ # self.input contains the phenotype values we need to send to R
+ self.input = {}
+ # All the strains we have data for (contains duplicates)
+ strains = []
+ # All the traits we have data for (should not contain duplicates)
+ traits = []
+ for trait in self.trait_list:
+ traits.append(trait[0].name)
+ self.input[trait[0].name] = {}
+ for strain in trait[0].data:
+ strains.append(strain)
+ self.input[trait[0].name][strain] = trait[0].data[strain].value
+
+ # Transfer the load data from python to R
+ # Unique strains in R vector
+ uStrainsR = r_unique(ro.Vector(strains))
+ uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector
+
+ r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
+ r_cat("The number of unique traits:", r_length(uTraitsR), "\n")
+
+ # rM is the datamatrix holding all the data in
+ # R /rows = strains columns = traits
+ rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(
+ uTraitsR), dimnames=r_list(uStrainsR, uTraitsR))
+ for t in uTraitsR:
+ # R uses vectors every single element is a vector
+ trait = t[0]
+ for s in uStrainsR:
+ # R uses vectors every single element is a vector
+ strain = s[0]
+ rM.rx[strain, trait] = self.input[trait].get(
+ strain) # Update the matrix location
+ sys.stdout.flush()
+
+ self.results = {}
+ # Number of phenotypes/traits
+ self.results['nphe'] = r_length(uTraitsR)[0]
+ self.results['nstr'] = r_length(
+ uStrainsR)[0] # Number of strains
+ self.results['phenotypes'] = uTraitsR # Traits used
+ # Strains used in the analysis
+ self.results['strains'] = uStrainsR
+ # Store the user specified parameters for the output page
+ self.results['requestform'] = requestform
+
+ # Calculate soft threshold if the user specified the
+ # SoftThreshold variable
+ if requestform.get('SoftThresholds') is not None:
+ powers = [int(threshold.strip())
+ for threshold in requestform['SoftThresholds'].rstrip().split(",")]
+ rpow = r_unlist(r_c(powers))
+ print(("SoftThresholds: {} == {}".format(powers, rpow)))
+ self.sft = self.r_pickSoftThreshold(
+ rM, powerVector=rpow, verbose=5)
+
+ print(("PowerEstimate: {}".format(self.sft[0])))
+ self.results['PowerEstimate'] = self.sft[0]
+ if self.sft[0][0] is ri.NA_Integer:
+ print("No power is suitable for the analysis, just use 1")
+ # No power could be estimated
+ self.results['Power'] = 1
+ else:
+ # Use the estimated power
+ self.results['Power'] = self.sft[0][0]
+ else:
+ # The user clicked a button, so no soft threshold selection
+ # Use the power value the user gives
+ self.results['Power'] = requestform.get('Power')
+
+ # Create the block wise modules using WGCNA
+ network = self.r_blockwiseModules(
+ rM,
+ power=self.results['Power'],
+ TOMType=requestform['TOMtype'],
+ minModuleSize=requestform['MinModuleSize'],
+ verbose=3)
+
+ # Save the network for the GUI
+ self.results['network'] = network
+
+ # How many modules and how many gene per module ?
+ print(("WGCNA found {} modules".format(r_table(network[1]))))
+ self.results['nmod'] = r_length(r_table(network[1]))[0]
+
+ # The iconic WCGNA plot of the modules in the hanging tree
+ self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
+ self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl']
+ r_png(self.results['imgloc'], width=1000, height=600, type='cairo-png')
+ mergedColors = self.r_labels2colors(network[1])
+ self.r_plotDendroAndColors(network[5][0], mergedColors,
+ "Module colors", dendroLabels=False,
+ hang=0.03, addGuide=True, guideHang=0.05)
+ r_dev_off()
+ sys.stdout.flush()
+
+ def render_image(self, results):
+ print(("pre-loading imgage results:", self.results['imgloc']))
+ imgfile = open(self.results['imgloc'], 'rb')
+ imgdata = imgfile.read()
+ imgB64 = base64.b64encode(imgdata)
+ bytesarray = arr('B', imgB64)
+ self.results['imgdata'] = bytesarray
+
+ def process_results(self, results):
+ print("Processing WGCNA output")
+ template_vars = {}
+ template_vars["input"] = self.input
+ # Results from the soft threshold analysis
+ template_vars["powers"] = self.sft[1:]
+ template_vars["results"] = self.results
+ self.render_image(results)
+ sys.stdout.flush()
+ return(dict(template_vars))