diff options
-rw-r--r-- | misc/notes_DA.txt | 10 | ||||
-rwxr-xr-x | wqflask/base/data_set.py | 2 | ||||
-rwxr-xr-x | wqflask/wqflask/templates/collections/view.html | 12 | ||||
-rw-r--r-- | wqflask/wqflask/templates/wgcna_results.html | 76 | ||||
-rw-r--r-- | wqflask/wqflask/templates/wgcna_setup.html | 28 | ||||
-rwxr-xr-x | wqflask/wqflask/views.py | 21 | ||||
-rwxr-xr-x | wqflask/wqflask/wgcna/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/wqflask/wgcna/wgcna_analysis.py | 156 |
8 files changed, 302 insertions, 3 deletions
diff --git a/misc/notes_DA.txt b/misc/notes_DA.txt new file mode 100644 index 00000000..410e0182 --- /dev/null +++ b/misc/notes_DA.txt @@ -0,0 +1,10 @@ +Danny's notes about the genenetwork source + +Location of static files: + +Location of HTML templates: wqflask/wqflask/templates/ + +Entry point of the wqflask app: wqflask/wqflask/__init__.py + +Application routes: wqflask/wqflask/views.py + diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index e2db9ad7..427fd991 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -89,7 +89,7 @@ class Dataset_Types(object): for group in data['datasets'][species]: for dataset_type in data['datasets'][species][group]: for dataset in data['datasets'][species][group][dataset_type]: - print("dataset is:", dataset) + #print("dataset is:", dataset) short_dataset_name = dataset[0] if dataset_type == "Phenotypes": diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html index 29c65058..5450f903 100755 --- a/wqflask/wqflask/templates/collections/view.html +++ b/wqflask/wqflask/templates/collections/view.html @@ -38,6 +38,18 @@ <input type="submit" class="btn btn-primary" value="Correlation Matrix" /> </div> </form> + <form action="/wgcna_setup" method="post"> + {% if uc %} + <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" /> + {% endif %} + <input type="hidden" name="trait_list" id="trait_list" value= " + {% for this_trait in trait_obs %} + {{ this_trait.name }}:{{ this_trait.dataset.name }}, + {% endfor %}" > + <div class="col-xs-2 controls"> + <input type="submit" class="btn btn-primary" value="WGCNA Analysis" /> + </div> + </form> <form action="/heatmap" method="post"> {% if uc %} <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" /> diff --git a/wqflask/wqflask/templates/wgcna_results.html b/wqflask/wqflask/templates/wgcna_results.html new file mode 100644 index 00000000..0dc030b1 --- /dev/null +++ b/wqflask/wqflask/templates/wgcna_results.html @@ -0,0 +1,76 @@ +{% extends "base.html" %} +{% block title %}WCGNA results{% endblock %} + +{% block content %} <!-- Start of body --> + <div class="container"> + <h1>WGCNA Results</h1> + Analysis found {{results['nmod']}} modules when scanning {{results['nphe']}} phenotypes, measured on {{results['nstr']}} strains.<br> + Additional parameters settings: + <ul> + <li>Soft thresholds checked = {{results['requestform']['SoftThresholds']}}</li> + <li>Power used for this analysis = {{results['Power']}}</li> + <li>TomType = {{results['requestform']['TOMtype']}}</li> + <li>Minimum module size = {{results['requestform']['MinModuleSize'] }}</li> + <li>mergeCutHeight = {{results['requestform']['mergeCutHeight'] }}</li> + </ul> + + <h3>Soft threshold table</h3> + <table width="80%"> + <tr><th>Power</th><th>SFT.R.sq</th><th>slope</th><th>truncated.R.sq</th><th>mean.k</th><th>median.k</th><th>max.k</th><th>Analysis</th></tr> + {% for r in range(powers[0][0]|length) %} + {% if powers[0][1][r] > 0.85 %} + <tr style="color: #00ff00;"> + {% elif powers[0][1][r] > 0.75 %} + <tr style="color: #aaaa00;"> + {% else %} + <tr style="color: #ff0000;"> + {% endif %} + {% for c in range(powers[0]|length) %} + <td>{{powers[0][c][r]|round(3)}}</td> + {% endfor %} + <td style="color: #000000;"> + {% if powers[0][1][r] > 0.75 %} + <input type="submit" value="Redo use power = {{powers[0][0][r]}}" /></td> + {% endif %} + </tr> + {% endfor %} + </table> + <h3>WGCNA module plot</h3> + <a href="/tmp/{{ results['imgurl'] }}"> + <img alt="Embedded Image" src="data:image/png;base64, + {% for elem in results['imgdata'] -%} + {% print("%c"|format(elem)) %} + {%- endfor %} + " /></a> + + + <h3>Phenotype / Module table</h3> + <table width="80%"> + <tr><th>Phenotype</th><th>Module</th></tr> + {% for r in range(results['nphe']) %} + <tr> + <td>{{results['phenotypes'][r][0]}}</td> + <td>{{results['network'][0][r]}}</td> + </tr> + {% endfor %} + </table> + + <h3>Module eigen genes</h3> + <table width="80%"> + <tr><th>Phenotype</th> + {% for m in range(results['nmod']) %} + <th><input type="submit" value="Add module {{m}} to collection" /></th> + {% endfor %} + </tr> + {% for r in range(results['nstr']) %} + <tr> + <td>{{results['strains'][r][0]}}</td> + {% for m in range(results['nmod']) %} + <td>{{results['network'][2][m][r]}}</td> + {% endfor %} + </tr> + {% endfor %} + </table> + </div> +{% endblock %} + diff --git a/wqflask/wqflask/templates/wgcna_setup.html b/wqflask/wqflask/templates/wgcna_setup.html new file mode 100644 index 00000000..49181938 --- /dev/null +++ b/wqflask/wqflask/templates/wgcna_setup.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} +{% block title %}WCGNA analysis{% endblock %} + +{% block content %} <!-- Start of body --> +<form action="/wgcna_results" method="post"> + <div class="container"> + + <h1>WGCNA analysis parameters</h1> + {% if request.form['trait_list'].split(",")|length <= 4 %} + + <h2 style="color: #ff0000;">ERROR: Too few phenotypes as input</h2> + Please make sure you select enough phenotypes / genes to perform WGCNA, + your collection needs to contain at least 4 different phenotypes. You provided {{request.form['trait_list'].split(',')|length}} phenotypes as input + + {% else %} + <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}"> + <table> + <tr><td>Soft threshold:</td><td><input type="text" class="form-inline" name="SoftThresholds" id="SoftThresholds" value="1,2,3,4,5,6,7,8,9"></td></tr> + <tr><td>Minimum module size:</td><td><input type="text" class="form-inline" name="MinModuleSize" id="MinModuleSize" value="30"></td></tr> + <tr><td>TOMtype:</td><td><input type="text" class="form-inline" name="TOMtype" id="TOMtype" value="unsigned"></td></tr> + <tr><td>mergeCutHeight:</td><td><input type="text" class="form-inline" name="mergeCutHeight" id="mergeCutHeight" value="0.25"></td></tr> + </table> + <input type="submit" class="btn btn-primary" value="Run WGCNA using these settings" /> + {% endif %} + </div> +</form> +{% endblock %} + diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 7a8a0f03..dc199ab2 100755 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -46,6 +46,9 @@ from wqflask.interval_mapping import interval_mapping from wqflask.correlation import show_corr_results from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot + +from wqflask.wgcna import wgcna_analysis + from utility import temp_data from base import webqtlFormData @@ -93,7 +96,7 @@ def tmp_page(img_path): print("img_path:", img_path) initial_start_vars = request.form print("initial_start_vars:", initial_start_vars) - imgfile = open('/home/zas1024/tmp/' + img_path, 'rb') + imgfile = open(webqtlConfig.TMPDIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) @@ -174,6 +177,20 @@ def help(): doc = docs.Docs("help") return render_template("docs.html", **doc.__dict__) +@app.route("/wgcna_setup", methods=('POST',)) +def wcgna_setup(): + print("In wgcna, request.form is:", request.form) # We are going to get additional user input for the analysis + return render_template("wgcna_setup.html", **request.form) # Display them using the template + +@app.route("/wgcna_results", methods=('POST',)) +def wcgna_results(): + print("In wgcna, request.form is:", request.form) + wgcna = wgcna_analysis.WGCNA() # Start R, load the package and pointers and create the analysis + wgcnaA = wgcna.run_analysis(request.form) # Start the analysis, a wgcnaA object should be a separate long running thread + result = wgcna.process_results(wgcnaA) # After the analysis is finished store the result + return render_template("wgcna_results.html", **result) # Display them using the template + + @app.route("/news") def news_route(): newsobject = news.News() @@ -328,7 +345,7 @@ def marker_regression_page(): 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno' ) - print("Random Print too see if it is running:", initial_start_vars) + print("Marker regression called with initial_start_vars:", initial_start_vars) start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): diff --git a/wqflask/wqflask/wgcna/__init__.py b/wqflask/wqflask/wgcna/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/wqflask/wqflask/wgcna/__init__.py diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py new file mode 100644 index 00000000..f23b1417 --- /dev/null +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -0,0 +1,156 @@ +# WGCNA analysis for GN2 +# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> +import sys +from numpy import * +import scipy as sp # SciPy +import rpy2.robjects as ro # R Objects +import rpy2.rinterface as ri + +from base import webqtlConfig # For paths and stuff +from utility import webqtlUtil # Random number for the image + +import base64 +import array + +from utility import helper_functions + +from rpy2.robjects.packages import importr +utils = importr("utils") + +## Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_read_csv = ro.r["read.csv"] # Map the read.csv function +r_dim = ro.r["dim"] # Map the dim function +r_c = ro.r["c"] # Map the c function +r_cat = ro.r["cat"] # Map the cat function +r_paste = ro.r["paste"] # Map the paste function +r_unlist = ro.r["unlist"] # Map the unlist function +r_unique = ro.r["unique"] # Map the unique function +r_length = ro.r["length"] # Map the length function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_matrix = ro.r.matrix # Map the matrix function +r_seq = ro.r["seq"] # Map the seq function +r_table = ro.r["table"] # Map the table function +r_names = ro.r["names"] # Map the names function +r_sink = ro.r["sink"] # Map the sink function +r_is_NA = ro.r["is.na"] # Map the is.na function +r_file = ro.r["file"] # Map the file function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function + +class WGCNA(object): + def __init__(self): + print("Initialization of WGCNA") + #log = r_file("/tmp/genenetwork_wcgna.log", open = "wt") + #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + #r_sink(log, type = "message") + r_library("WGCNA") # Load WGCNA - Should only be done once, since it is quite expensive + r_options(stringsAsFactors = False) + print("Initialization of WGCNA done, package loaded in R session") + self.r_enableWGCNAThreads = ro.r["enableWGCNAThreads"] # Map the enableWGCNAThreads function + self.r_pickSoftThreshold = ro.r["pickSoftThreshold"] # Map the pickSoftThreshold function + self.r_blockwiseModules = ro.r["blockwiseModules"] # Map the blockwiseModules function + self.r_labels2colors = ro.r["labels2colors"] # Map the labels2colors function + self.r_plotDendroAndColors = ro.r["plotDendroAndColors"] # Map the plotDendroAndColors function + print("Obtained pointers to WGCNA functions") + + def run_analysis(self, requestform): + print("Starting WGCNA analysis on dataset") + self.r_enableWGCNAThreads() # Enable multi threading + self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + print("Retrieved phenotype data from database", requestform['trait_list']) + helper_functions.get_trait_db_obs(self, self.trait_db_list) + + self.input = {} # self.input contains the phenotype values we need to send to R + strains = [] # All the strains we have data for (contains duplicates) + traits = [] # All the traits we have data for (should not contain duplicates) + for trait in self.trait_list: + traits.append(trait[0].name) + self.input[trait[0].name] = {} + for strain in trait[0].data: + strains.append(strain) + self.input[trait[0].name][strain] = trait[0].data[strain].value + + # Transfer the load data from python to R + uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector + uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector + + r_cat("The number of unique strains:", r_length(uStrainsR), "\n") + r_cat("The number of unique traits:", r_length(uTraitsR), "\n") + + # rM is the datamatrix holding all the data in R /rows = strains columns = traits + rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR)) + for t in uTraitsR: + trait = t[0] # R uses vectors every single element is a vector + for s in uStrainsR: + strain = s[0] # R uses vectors every single element is a vector + rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location + #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0]) + sys.stdout.flush() + + self.results = {} + self.results['nphe'] = r_length(uTraitsR)[0] # Number of phenotypes/traits + self.results['nstr'] = r_length(uStrainsR)[0] # Number of strains + self.results['phenotypes'] = uTraitsR # Traits used + self.results['strains'] = uStrainsR # Strains used in the analysis + self.results['requestform'] = requestform # Store the user specified parameters for the output page + + # Calculate soft threshold if the user specified the SoftThreshold variable + if requestform.get('SoftThresholds') is not None: + powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")] + rpow = r_unlist(r_c(powers)) + print "SoftThresholds: {} == {}".format(powers, rpow) + self.sft = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5) + + print "PowerEstimate: {}".format(self.sft[0]) + self.results['PowerEstimate'] = self.sft[0] + if self.sft[0][0] is ri.NA_Integer: + print "No power is suitable for the analysis, just use 1" + self.results['Power'] = 1 # No power could be estimated + else: + self.results['Power'] = self.sft[0][0] # Use the estimated power + else: + # The user clicked a button, so no soft threshold selection + self.results['Power'] = requestform.get('Power') # Use the power value the user gives + + # Create the block wise modules using WGCNA + network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3) + + # Save the network for the GUI + self.results['network'] = network + + # How many modules and how many gene per module ? + print "WGCNA found {} modules".format(r_table(network[1])) + self.results['nmod'] = r_length(r_table(network[1]))[0] + + # The iconic WCGNA plot of the modules in the hanging tree + self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png" + self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl'] + r_png(self.results['imgloc'], width=1000, height=600) + mergedColors = self.r_labels2colors(network[1]) + self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05) + r_dev_off() + sys.stdout.flush() + + def render_image(self, results): + print("pre-loading imgage results:", self.results['imgloc']) + imgfile = open(self.results['imgloc'], 'rb') + imgdata = imgfile.read() + imgB64 = imgdata.encode("base64") + bytesarray = array.array('B', imgB64) + self.results['imgdata'] = bytesarray + + def process_results(self, results): + print("Processing WGCNA output") + template_vars = {} + template_vars["input"] = self.input + template_vars["powers"] = self.sft[1:] # Results from the soft threshold analysis + template_vars["results"] = self.results + self.render_image(results) + sys.stdout.flush() + #r_sink(type = "message") # This restores R output to the stdout/stderr + #r_sink() # We should end the Rpy session more or less + return(dict(template_vars)) + |