""" WGCNA analysis for GN2 Author / Maintainer: Danny Arends """ import base64 import sys import rpy2.robjects as ro # R Objects import rpy2.rinterface as ri from array import array as arr from numpy import * from gn2.base.webqtlConfig import GENERATED_IMAGE_DIR from rpy2.robjects.packages import importr from gn2.utility import webqtlUtil # Random number for the image from gn2.utility import helper_functions utils = importr("utils") # Get pointers to some common R functions r_library = ro.r["library"] # Map the library function r_options = ro.r["options"] # Map the options function r_read_csv = ro.r["read.csv"] # Map the read.csv function r_dim = ro.r["dim"] # Map the dim function r_c = ro.r["c"] # Map the c function r_cat = ro.r["cat"] # Map the cat function r_paste = ro.r["paste"] # Map the paste function r_unlist = ro.r["unlist"] # Map the unlist function r_unique = ro.r["unique"] # Map the unique function r_length = ro.r["length"] # Map the length function r_unlist = ro.r["unlist"] # Map the unlist function r_list = ro.r.list # Map the list function r_matrix = ro.r.matrix # Map the matrix function r_seq = ro.r["seq"] # Map the seq function r_table = ro.r["table"] # Map the table function r_names = ro.r["names"] # Map the names function r_sink = ro.r["sink"] # Map the sink function r_is_NA = ro.r["is.na"] # Map the is.na function r_file = ro.r["file"] # Map the file function r_png = ro.r["png"] # Map the png function for plotting r_dev_off = ro.r["dev.off"] # Map the dev.off function class WGCNA: def __init__(self): # To log output from stdout/stderr to a file add `r_sink(log)` print("Initialization of WGCNA") # Load WGCNA - Should only be done once, since it is quite expensive r_library("WGCNA") r_options(stringsAsFactors=False) print("Initialization of WGCNA done, package loaded in R session") # Map the enableWGCNAThreads function self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] # Map the pickSoftThreshold function self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] # Map the blockwiseModules function self.r_blockwiseModules = ro.r["blockwiseModules"] # Map the labels2colors function self.r_labels2colors = ro.r["labels2colors"] # Map the plotDendroAndColors function self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] print("Obtained pointers to WGCNA functions") def run_analysis(self, requestform): print("Starting WGCNA analysis on dataset") # Enable multi threading self.r_enableWGCNAThreads() self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] print(("Retrieved phenotype data from database", requestform['trait_list'])) helper_functions.get_trait_db_obs(self, self.trait_db_list) # self.input contains the phenotype values we need to send to R self.input = {} # All the strains we have data for (contains duplicates) strains = [] # All the traits we have data for (should not contain duplicates) traits = [] for trait in self.trait_list: traits.append(trait[0].name) self.input[trait[0].name] = {} for strain in trait[0].data: strains.append(strain) self.input[trait[0].name][strain] = trait[0].data[strain].value # Transfer the load data from python to R # Unique strains in R vector uStrainsR = r_unique(ro.Vector(strains)) uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector r_cat("The number of unique strains:", r_length(uStrainsR), "\n") r_cat("The number of unique traits:", r_length(uTraitsR), "\n") # rM is the datamatrix holding all the data in # R /rows = strains columns = traits rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length( uTraitsR), dimnames=r_list(uStrainsR, uTraitsR)) for t in uTraitsR: # R uses vectors every single element is a vector trait = t[0] for s in uStrainsR: # R uses vectors every single element is a vector strain = s[0] rM.rx[strain, trait] = self.input[trait].get( strain) # Update the matrix location sys.stdout.flush() self.results = {} # Number of phenotypes/traits self.results['nphe'] = r_length(uTraitsR)[0] self.results['nstr'] = r_length( uStrainsR)[0] # Number of strains self.results['phenotypes'] = uTraitsR # Traits used # Strains used in the analysis self.results['strains'] = uStrainsR # Store the user specified parameters for the output page self.results['requestform'] = requestform # Calculate soft threshold if the user specified the # SoftThreshold variable if requestform.get('SoftThresholds') is not None: powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")] rpow = r_unlist(r_c(powers)) print(("SoftThresholds: {} == {}".format(powers, rpow))) self.sft = self.r_pickSoftThreshold( rM, powerVector=rpow, verbose=5) print(("PowerEstimate: {}".format(self.sft[0]))) self.results['PowerEstimate'] = self.sft[0] if self.sft[0][0] is ri.NA_Integer: print("No power is suitable for the analysis, just use 1") # No power could be estimated self.results['Power'] = 1 else: # Use the estimated power self.results['Power'] = self.sft[0][0] else: # The user clicked a button, so no soft threshold selection # Use the power value the user gives self.results['Power'] = requestform.get('Power') # Create the block wise modules using WGCNA network = self.r_blockwiseModules( rM, power=self.results['Power'], TOMType=requestform['TOMtype'], minModuleSize=requestform['MinModuleSize'], verbose=3) # Save the network for the GUI self.results['network'] = network # How many modules and how many gene per module ? print(("WGCNA found {} modules".format(r_table(network[1])))) self.results['nmod'] = r_length(r_table(network[1]))[0] # The iconic WCGNA plot of the modules in the hanging tree self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png" self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl'] r_png(self.results['imgloc'], width=1000, height=600, type='cairo-png') mergedColors = self.r_labels2colors(network[1]) self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels=False, hang=0.03, addGuide=True, guideHang=0.05) r_dev_off() sys.stdout.flush() def render_image(self, results): print(("pre-loading imgage results:", self.results['imgloc'])) imgfile = open(self.results['imgloc'], 'rb') imgdata = imgfile.read() imgB64 = base64.b64encode(imgdata) bytesarray = arr('B', imgB64) self.results['imgdata'] = bytesarray def process_results(self, results): print("Processing WGCNA output") template_vars = {} template_vars["input"] = self.input # Results from the soft threshold analysis template_vars["powers"] = self.sft[1:] template_vars["results"] = self.results self.render_image(results) sys.stdout.flush() return(dict(template_vars))