From b47cd2fca0c9a835b15288aa4959867eaeb00f5a Mon Sep 17 00:00:00 2001
From: DannyArends
Date: Sun, 20 Sep 2015 12:12:04 +0200
Subject: Transfering data from python to R for WGCNA analysis using Rpy2

---
 wqflask/wqflask/wgcna/wgcna_analysis.py | 50 ++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 10 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 84cdc241..5316fcd4 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -1,9 +1,10 @@
 # WGCNA analysis for GN2
 # Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
-
+import sys
 from numpy import *
 import scipy as sp                            # SciPy
 import rpy2.robjects as ro                    # R Objects
+import rpy2.rinterface as ri
 
 from utility import helper_functions
 
@@ -16,6 +17,13 @@ r_options       = ro.r["options"]             # Map the options function
 r_read_csv      = ro.r["read.csv"]            # Map the read.csv function
 r_dim           = ro.r["dim"]                 # Map the dim function
 r_c             = ro.r["c"]                   # Map the c function
+r_cat           = ro.r["cat"]                 # Map the cat function
+r_paste         = ro.r["paste"]               # Map the paste function
+r_unlist        = ro.r["unlist"]              # Map the unlist function
+r_unique        = ro.r["unique"]              # Map the unique function
+r_length        = ro.r["length"]              # Map the length function
+r_matrix        = ro.r["matrix"]              # Map the matrix function
+r_list          = ro.r["list"]                # Map the list function
 r_seq           = ro.r["seq"]                 # Map the seq function
 r_table         = ro.r["table"]               # Map the table function
 r_names         = ro.r["names"]               # Map the names function
@@ -46,25 +54,47 @@ class WGCNA(object):
         self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
         print("Retrieved phenotype data from database", requestform['trait_list'])
         helper_functions.get_trait_db_obs(self, self.trait_db_list)
-        self.results = {}
-        for t in self.trait_list:
-            strains = []
-            for s in t[0].data:
-                strains.append(s)
-            self.results[t]  = strains
 
-        print("Retrieved phenotype data from database")
+        self.input = {}           # self.input contains the phenotype values we need to send to R
+        strains = []              # All the strains we have data for (contains duplicates)
+        traits  = []              # All the traits we have data for (should not contain duplicates)
+        for trait in self.trait_list:
+            traits.append(trait[0].name)
+            self.input[trait[0].name] = {}
+            for strain in trait[0].data:
+                strains.append(strain)
+                self.input[trait[0].name][strain]  = trait[0].data[strain].value
+
+        # Transfer the load data from python to R
+        uStrainsR = r_unique(ro.Vector(strains))    # Unique strains in R vector
+        uTraitsR = r_unique(ro.Vector(traits))      # Unique traits in R vector
+
+        r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
+        r_cat("The number of unique traits:", r_length(uTraitsR), "\n")
+
+        # rM is the datamatrix holding all the data in R /rows = strains columns = traits
+        rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR))
+        for t in uTraitsR:
+            trait = t[0]                  # R uses vectors every single element is a vector
+            for s in uStrainsR:
+                strain = s[0]             # R uses vectors every single element is a vector
+                rM.rx[strain, trait] = self.input[trait].get(strain)  # Update the matrix location
+                print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
+                sys.stdout.flush()
+
         # TODO:
-        # Load data into R
+        self.results = {}
         # Calculate a good soft threshold
         # Create block wise modules using WGCNA
         # How many modules and how many gene per module ?
         # Show the iconic WCGNA plot of the modules in the hanging tree
+        sys.stdout.flush()
 
     def process_results(self, results):
         print("Processing WGCNA output")
         template_vars = {}
-        template_vars["result"] = self.results
+        template_vars["input"] = self.input
+        template_vars["results"] = self.results
         #r_sink(type = "message")                                   # This restores R output to the stdout/stderr
         #r_sink()                                                   # We should end the Rpy session more or less
         return(dict(template_vars))
-- 
cgit v1.2.3