Merge branch 'master' of github.com:zsloan/genenetwork2

author: zsloan 2015-10-09 22:08:17 +0000
committer: zsloan 2015-10-09 22:08:17 +0000
commit: 01fb1212408818512a729f6bd00a203defa5290c (patch)
tree: e608fd0f61d07abc61a33194d6620b0fbc12ba1f
parent: fb40bfa27aef7dd088bf2467d0845e8935444c4b (diff)
parent: a9910ed7c5b1b0959ae2a39872ff90a3a76d4e9e (diff)
download: genenetwork2-01fb1212408818512a729f6bd00a203defa5290c.tar.gz
8 files changed, 302 insertions, 3 deletions
diff --git a/misc/notes_DA.txt b/misc/notes_DA.txt
new file mode 100644
index 00000000..410e0182
--- /dev/null
+++ b/misc/notes_DA.txt
@@ -0,0 +1,10 @@
+Danny's notes about the genenetwork source
+
+Location of static files:
+
+Location of HTML templates:       wqflask/wqflask/templates/
+
+Entry point of the wqflask app:   wqflask/wqflask/__init__.py
+
+Application routes:               wqflask/wqflask/views.py
+
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index e2db9ad7..427fd991 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -89,7 +89,7 @@ class Dataset_Types(object):
             for group in data['datasets'][species]:
                 for dataset_type in data['datasets'][species][group]:
                     for dataset in data['datasets'][species][group][dataset_type]:
-                        print("dataset is:", dataset)
+                        #print("dataset is:", dataset)
                         
                         short_dataset_name = dataset[0]
                         if dataset_type == "Phenotypes":
diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html
index 29c65058..5450f903 100755
--- a/wqflask/wqflask/templates/collections/view.html
+++ b/wqflask/wqflask/templates/collections/view.html
@@ -38,6 +38,18 @@
                     <input type="submit" class="btn btn-primary" value="Correlation Matrix" />
                 </div>
             </form>
+            <form action="/wgcna_setup" method="post">
+                {% if uc %}
+                <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" />
+                {% endif %}
+                <input type="hidden" name="trait_list" id="trait_list" value= "
+                {% for this_trait in trait_obs %}
+                    {{ this_trait.name }}:{{ this_trait.dataset.name }},
+                {% endfor %}" >
+                <div class="col-xs-2 controls">
+                    <input type="submit" class="btn btn-primary" value="WGCNA Analysis" />
+                </div>
+            </form>
             <form action="/heatmap" method="post">
                 {% if uc %}
                 <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" />
diff --git a/wqflask/wqflask/templates/wgcna_results.html b/wqflask/wqflask/templates/wgcna_results.html
new file mode 100644
index 00000000..0dc030b1
--- /dev/null
+++ b/wqflask/wqflask/templates/wgcna_results.html
@@ -0,0 +1,76 @@
+{% extends "base.html" %}
+{% block title %}WCGNA results{% endblock %}
+
+{% block content %} <!-- Start of body -->
+  <div class="container">
+    <h1>WGCNA Results</h1>
+    Analysis found {{results['nmod']}} modules when scanning {{results['nphe']}} phenotypes, measured on {{results['nstr']}} strains.<br>
+    Additional parameters settings:
+    <ul>
+      <li>Soft thresholds checked = {{results['requestform']['SoftThresholds']}}</li>
+      <li>Power used for this analysis = {{results['Power']}}</li>
+      <li>TomType = {{results['requestform']['TOMtype']}}</li>
+      <li>Minimum module size = {{results['requestform']['MinModuleSize'] }}</li>
+      <li>mergeCutHeight = {{results['requestform']['mergeCutHeight'] }}</li>
+    </ul>
+
+    <h3>Soft threshold table</h3>
+    <table width="80%">
+      <tr><th>Power</th><th>SFT.R.sq</th><th>slope</th><th>truncated.R.sq</th><th>mean.k</th><th>median.k</th><th>max.k</th><th>Analysis</th></tr>
+      {% for r in range(powers[0][0]|length) %}
+        {% if powers[0][1][r] > 0.85 %}
+        <tr style="color: #00ff00;">
+        {% elif powers[0][1][r] > 0.75 %}
+        <tr style="color: #aaaa00;">
+        {% else %}
+        <tr style="color: #ff0000;">
+        {% endif %}
+        {% for c in range(powers[0]|length) %}
+        <td>{{powers[0][c][r]|round(3)}}</td>
+       {% endfor %}
+        <td style="color: #000000;">
+          {% if powers[0][1][r] > 0.75 %}
+          <input type="submit" value="Redo use power = {{powers[0][0][r]}}" /></td>
+          {% endif %}
+        </tr>
+    {% endfor %}
+    </table>
+    <h3>WGCNA module plot</h3>
+                <a href="/tmp/{{ results['imgurl'] }}">
+                    <img alt="Embedded Image" src="data:image/png;base64,
+                    {% for elem in results['imgdata'] -%}
+                    {% print("%c"|format(elem)) %}
+                    {%- endfor %}
+                    " /></a>
+
+
+    <h3>Phenotype / Module table</h3>
+    <table width="80%">
+      <tr><th>Phenotype</th><th>Module</th></tr>
+      {% for r in range(results['nphe']) %}
+        <tr>
+          <td>{{results['phenotypes'][r][0]}}</td>
+          <td>{{results['network'][0][r]}}</td>
+        </tr>
+    {% endfor %}
+    </table>
+
+    <h3>Module eigen genes</h3>
+    <table width="80%">
+      <tr><th>Phenotype</th>
+      {% for m in range(results['nmod']) %}
+        <th><input type="submit" value="Add module {{m}} to collection" /></th>
+      {% endfor %}
+      </tr>
+      {% for r in range(results['nstr']) %}
+        <tr>
+          <td>{{results['strains'][r][0]}}</td>
+          {% for m in range(results['nmod']) %}
+          <td>{{results['network'][2][m][r]}}</td>
+          {% endfor %}
+        </tr>
+    {% endfor %}
+    </table>
+  </div>
+{% endblock %}
+
diff --git a/wqflask/wqflask/templates/wgcna_setup.html b/wqflask/wqflask/templates/wgcna_setup.html
new file mode 100644
index 00000000..49181938
--- /dev/null
+++ b/wqflask/wqflask/templates/wgcna_setup.html
@@ -0,0 +1,28 @@
+{% extends "base.html" %}
+{% block title %}WCGNA analysis{% endblock %}
+
+{% block content %} <!-- Start of body -->
+<form action="/wgcna_results" method="post">
+  <div class="container">
+
+    <h1>WGCNA analysis parameters</h1>
+    {% if request.form['trait_list'].split(",")|length <= 4 %}
+
+      <h2 style="color: #ff0000;">ERROR: Too few phenotypes as input</h2>
+      Please make sure you select enough phenotypes / genes to perform WGCNA, 
+      your collection needs to contain at least 4 different phenotypes. You provided {{request.form['trait_list'].split(',')|length}} phenotypes as input
+
+    {% else %}
+    <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}">
+    <table>
+      <tr><td>Soft threshold:</td><td><input type="text" class="form-inline" name="SoftThresholds" id="SoftThresholds" value="1,2,3,4,5,6,7,8,9"></td></tr>
+      <tr><td>Minimum module size:</td><td><input type="text" class="form-inline" name="MinModuleSize" id="MinModuleSize" value="30"></td></tr>
+      <tr><td>TOMtype:</td><td><input type="text" class="form-inline" name="TOMtype" id="TOMtype" value="unsigned"></td></tr>
+      <tr><td>mergeCutHeight:</td><td><input type="text" class="form-inline" name="mergeCutHeight" id="mergeCutHeight" value="0.25"></td></tr>
+    </table>
+    <input type="submit" class="btn btn-primary" value="Run WGCNA using these settings" />
+    {% endif %}
+  </div>
+</form>
+{% endblock %}
+
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 7a8a0f03..dc199ab2 100755
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -46,6 +46,9 @@ from wqflask.interval_mapping import interval_mapping
 from wqflask.correlation import show_corr_results
 from wqflask.correlation_matrix import show_corr_matrix
 from wqflask.correlation import corr_scatter_plot
+
+from wqflask.wgcna import wgcna_analysis
+
 from utility import temp_data
 
 from base import webqtlFormData
@@ -93,7 +96,7 @@ def tmp_page(img_path):
     print("img_path:", img_path)
     initial_start_vars = request.form
     print("initial_start_vars:", initial_start_vars)
-    imgfile = open('/home/zas1024/tmp/' + img_path, 'rb')
+    imgfile = open(webqtlConfig.TMPDIR + img_path, 'rb')
     imgdata = imgfile.read()
     imgB64 = imgdata.encode("base64")
     bytesarray = array.array('B', imgB64)
@@ -174,6 +177,20 @@ def help():
     doc = docs.Docs("help")
     return render_template("docs.html", **doc.__dict__)
 
+@app.route("/wgcna_setup", methods=('POST',))
+def wcgna_setup():
+    print("In wgcna, request.form is:", request.form)             # We are going to get additional user input for the analysis
+    return render_template("wgcna_setup.html", **request.form)          # Display them using the template
+
+@app.route("/wgcna_results", methods=('POST',))
+def wcgna_results():
+    print("In wgcna, request.form is:", request.form)
+    wgcna = wgcna_analysis.WGCNA()                                # Start R, load the package and pointers and create the analysis
+    wgcnaA = wgcna.run_analysis(request.form)                     # Start the analysis, a wgcnaA object should be a separate long running thread
+    result = wgcna.process_results(wgcnaA)                        # After the analysis is finished store the result
+    return render_template("wgcna_results.html", **result)        # Display them using the template
+
+
 @app.route("/news")
 def news_route():
     newsobject = news.News()
@@ -328,7 +345,7 @@ def marker_regression_page():
         'mapmethod_rqtl_geno',
         'mapmodel_rqtl_geno'
     )
-    print("Random Print too see if it is running:", initial_start_vars)
+    print("Marker regression called with initial_start_vars:", initial_start_vars)
     start_vars = {}
     for key, value in initial_start_vars.iteritems():
         if key in wanted or key.startswith(('value:')):
diff --git a/wqflask/wqflask/wgcna/__init__.py b/wqflask/wqflask/wgcna/__init__.py
new file mode 100755
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/wqflask/wgcna/__init__.py
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
new file mode 100644
index 00000000..f23b1417
--- /dev/null
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -0,0 +1,156 @@
+# WGCNA analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+import sys
+from numpy import *
+import scipy as sp                            # SciPy
+import rpy2.robjects as ro                    # R Objects
+import rpy2.rinterface as ri
+
+from base import webqtlConfig                 # For paths and stuff
+from utility import webqtlUtil                # Random number for the image
+
+import base64
+import array
+
+from utility import helper_functions
+
+from rpy2.robjects.packages import importr
+utils = importr("utils")
+
+## Get pointers to some common R functions
+r_library       = ro.r["library"]             # Map the library function
+r_options       = ro.r["options"]             # Map the options function
+r_read_csv      = ro.r["read.csv"]            # Map the read.csv function
+r_dim           = ro.r["dim"]                 # Map the dim function
+r_c             = ro.r["c"]                   # Map the c function
+r_cat           = ro.r["cat"]                 # Map the cat function
+r_paste         = ro.r["paste"]               # Map the paste function
+r_unlist        = ro.r["unlist"]              # Map the unlist function
+r_unique        = ro.r["unique"]              # Map the unique function
+r_length        = ro.r["length"]              # Map the length function
+r_unlist        = ro.r["unlist"]              # Map the unlist function
+r_list          = ro.r.list                   # Map the list function
+r_matrix        = ro.r.matrix                 # Map the matrix function
+r_seq           = ro.r["seq"]                 # Map the seq function
+r_table         = ro.r["table"]               # Map the table function
+r_names         = ro.r["names"]               # Map the names function
+r_sink          = ro.r["sink"]                # Map the sink function
+r_is_NA         = ro.r["is.na"]               # Map the is.na function
+r_file          = ro.r["file"]                # Map the file function
+r_png           = ro.r["png"]                 # Map the png function for plotting
+r_dev_off       = ro.r["dev.off"]             # Map the dev.off function
+
+class WGCNA(object):
+    def __init__(self):
+        print("Initialization of WGCNA")
+        #log = r_file("/tmp/genenetwork_wcgna.log", open = "wt")
+        #r_sink(log)                                  # Uncomment the r_sink() commands to log output from stdout/stderr to a file
+        #r_sink(log, type = "message")
+        r_library("WGCNA")                            # Load WGCNA - Should only be done once, since it is quite expensive
+        r_options(stringsAsFactors = False)
+        print("Initialization of WGCNA done, package loaded in R session")
+        self.r_enableWGCNAThreads    = ro.r["enableWGCNAThreads"]        # Map the enableWGCNAThreads function
+        self.r_pickSoftThreshold     = ro.r["pickSoftThreshold"]         # Map the pickSoftThreshold function
+        self.r_blockwiseModules      = ro.r["blockwiseModules"]          # Map the blockwiseModules function
+        self.r_labels2colors         = ro.r["labels2colors"]             # Map the labels2colors function
+        self.r_plotDendroAndColors   = ro.r["plotDendroAndColors"]       # Map the plotDendroAndColors function
+        print("Obtained pointers to WGCNA functions")
+
+    def run_analysis(self, requestform):
+        print("Starting WGCNA analysis on dataset")
+        self.r_enableWGCNAThreads()                                      # Enable multi threading
+        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
+        print("Retrieved phenotype data from database", requestform['trait_list'])
+        helper_functions.get_trait_db_obs(self, self.trait_db_list)
+
+        self.input = {}           # self.input contains the phenotype values we need to send to R
+        strains = []              # All the strains we have data for (contains duplicates)
+        traits  = []              # All the traits we have data for (should not contain duplicates)
+        for trait in self.trait_list:
+            traits.append(trait[0].name)
+            self.input[trait[0].name] = {}
+            for strain in trait[0].data:
+                strains.append(strain)
+                self.input[trait[0].name][strain]  = trait[0].data[strain].value
+
+        # Transfer the load data from python to R
+        uStrainsR = r_unique(ro.Vector(strains))    # Unique strains in R vector
+        uTraitsR = r_unique(ro.Vector(traits))      # Unique traits in R vector
+
+        r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
+        r_cat("The number of unique traits:", r_length(uTraitsR), "\n")
+
+        # rM is the datamatrix holding all the data in R /rows = strains columns = traits
+        rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR))
+        for t in uTraitsR:
+            trait = t[0]                  # R uses vectors every single element is a vector
+            for s in uStrainsR:
+                strain = s[0]             # R uses vectors every single element is a vector
+                rM.rx[strain, trait] = self.input[trait].get(strain)  # Update the matrix location
+                #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
+                sys.stdout.flush()
+
+        self.results = {}
+        self.results['nphe'] = r_length(uTraitsR)[0]          # Number of phenotypes/traits
+        self.results['nstr'] = r_length(uStrainsR)[0]         # Number of strains
+        self.results['phenotypes'] = uTraitsR                 # Traits used
+        self.results['strains'] = uStrainsR                   # Strains used in the analysis
+        self.results['requestform'] = requestform             # Store the user specified parameters for the output page
+
+        # Calculate soft threshold if the user specified the SoftThreshold variable
+        if requestform.get('SoftThresholds') is not None:
+          powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")]
+          rpow = r_unlist(r_c(powers))
+          print "SoftThresholds: {} == {}".format(powers, rpow)
+          self.sft    = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5)
+
+          print "PowerEstimate: {}".format(self.sft[0])
+          self.results['PowerEstimate'] = self.sft[0]
+          if self.sft[0][0] is ri.NA_Integer:
+            print "No power is suitable for the analysis, just use 1"
+            self.results['Power'] = 1                         # No power could be estimated
+          else:
+            self.results['Power'] = self.sft[0][0]            # Use the estimated power
+        else:
+          # The user clicked a button, so no soft threshold selection
+          self.results['Power'] = requestform.get('Power')    # Use the power value the user gives
+
+        # Create the block wise modules using WGCNA
+        network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3)
+
+        # Save the network for the GUI
+        self.results['network'] = network
+
+        # How many modules and how many gene per module ?
+        print "WGCNA found {} modules".format(r_table(network[1]))
+        self.results['nmod'] = r_length(r_table(network[1]))[0]
+
+        # The iconic WCGNA plot of the modules in the hanging tree
+        self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
+        self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl']
+        r_png(self.results['imgloc'], width=1000, height=600)
+        mergedColors = self.r_labels2colors(network[1])
+        self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05)
+        r_dev_off()
+        sys.stdout.flush()
+
+    def render_image(self, results):
+        print("pre-loading imgage results:", self.results['imgloc'])
+        imgfile = open(self.results['imgloc'], 'rb')
+        imgdata = imgfile.read()
+        imgB64 = imgdata.encode("base64")
+        bytesarray = array.array('B', imgB64)
+        self.results['imgdata'] = bytesarray
+
+    def process_results(self, results):
+        print("Processing WGCNA output")
+        template_vars = {}
+        template_vars["input"] = self.input
+        template_vars["powers"] = self.sft[1:]                      # Results from the soft threshold analysis
+        template_vars["results"] = self.results
+        self.render_image(results)
+        sys.stdout.flush()
+        #r_sink(type = "message")                                   # This restores R output to the stdout/stderr
+        #r_sink()                                                   # We should end the Rpy session more or less
+        return(dict(template_vars))
+
author	zsloan	2015-10-09 22:08:17 +0000
committer	zsloan	2015-10-09 22:08:17 +0000
commit	01fb1212408818512a729f6bd00a203defa5290c (patch)
tree	e608fd0f61d07abc61a33194d6620b0fbc12ba1f
parent	fb40bfa27aef7dd088bf2467d0845e8935444c4b (diff)
parent	a9910ed7c5b1b0959ae2a39872ff90a3a76d4e9e (diff)
download	genenetwork2-01fb1212408818512a729f6bd00a203defa5290c.tar.gz