From a0f5aae35941d880f58f178c80bfe0b346d7e8af Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 30 Jan 2014 00:53:21 +0000
Subject: Most of the work is done towards running lmm.py from the command line
 and storing the results in redis

---
 misc/notes.txt                                     |   4 +-
 wqflask/base/data_set.py                           |   2 +-
 .../wqflask/marker_regression/marker_regression.py | 130 +++++++++++++++++----
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py              |  79 +++++++++++--
 4 files changed, 181 insertions(+), 34 deletions(-)
 mode change 100644 => 100755 wqflask/wqflask/my_pylmm/pyLMM/lmm.py

diff --git a/misc/notes.txt b/misc/notes.txt
index fa2152d9..f8ce2759 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -273,7 +273,9 @@ grep -ir (search string) (directory)
 
 ===========================================
 
-Use argparse to deal with command line arguments (instead of argv)
+Command line arguments:
+
+Use argparse to deal with command line arguments (instead of argv or optparse)
 
 ===========================================
 
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 9fbccf80..3deaa655 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -184,7 +184,7 @@ class HumanMarkers(Markers):
         self.markers = []
         for line in marker_data_fh:
             splat = line.strip().split()
-            print("splat:", splat)
+            #print("splat:", splat)
             marker = {}
             marker['chr'] = int(splat[0])
             marker['name'] = splat[1]
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 5fa288df..1594d35d 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -7,16 +7,20 @@ from pprint import pformat as pf
 
 import string
 import sys
+import datetime
 import os
 import collections
+import uuid
 
 import numpy as np
 from scipy import linalg
 
-import simplejson as json
+import cPickle as pickle
 
-#from redis import Redis
+import simplejson as json
 
+from redis import Redis
+Redis = Redis()
 
 from base.trait import GeneralTrait
 from base import data_set
@@ -38,7 +42,7 @@ class MarkerRegression(object):
 
         helper_functions.get_species_dataset_trait(self, start_vars)
 
-        tempdata = temp_data.TempData(temp_uuid)
+        #tempdata = temp_data.TempData(temp_uuid)
         
         self.samples = [] # Want only ones with values
         self.vals = []
@@ -48,7 +52,8 @@ class MarkerRegression(object):
             self.samples.append(str(sample))
             self.vals.append(value)
  
-        self.qtl_results = self.gen_data(tempdata)
+        #self.qtl_results = self.gen_data(tempdata)
+        self.qtl_results = self.gen_data(str(temp_uuid))
 
         #Get chromosome lengths for drawing the manhattan plot
         chromosome_mb_lengths = {}
@@ -61,15 +66,23 @@ class MarkerRegression(object):
         )
 
 
-    def gen_data(self, tempdata):
+    #def gen_data(self, tempdata):
+    def gen_data(self, temp_uuid):
         """Generates p-values for each marker"""
 
         self.dataset.group.get_markers()
 
         pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
 
+        #lmm_uuid = str(uuid.uuid4())
+
+        key = "pylmm:input:" + temp_uuid
+        print("key is:", pf(key))
+        #with Bench("Loading cache"):
+        #    result = Redis.get(key)
+
         if self.dataset.group.species == "human":
-            p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+            p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
         else:
             genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
             
@@ -77,25 +90,65 @@ class MarkerRegression(object):
             trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
             
             genotype_matrix = np.array(trimmed_genotype_data).T
-            
+
             #print("pheno_vector: ", pf(pheno_vector))
             #print("genotype_matrix: ", pf(genotype_matrix))
             #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
+
+            #params = {"pheno_vector": pheno_vector,
+            #            "genotype_matrix": genotype_matrix,
+            #            "restricted_max_likelihood": True,
+            #            "refit": False,
+            #            "temp_data": tempdata}
+            
+            params = dict(pheno_vector = pheno_vector.tolist(),
+                        genotype_matrix = genotype_matrix.tolist(),
+                        restricted_max_likelihood = True,
+                        refit = False,
+                        temp_uuid = temp_uuid,
+                        
+                        # meta data
+                        timestamp = datetime.datetime.now().isoformat(),
+                        )
+            
+            json_params = json.dumps(params)
+            print("json_params:", json_params)
+            Redis.set(key, json_params)
+            Redis.expire(key, 60*60)
+            print("before printing command")
+
+            command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key,
+                                                                                              "non-human")
+            print("command is:", command)
+            print("after printing command")
+
+            os.system(command)
+
+            #t_stats, p_values = lmm.run(key)
+            #lmm.run(key)
+            
+            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
+            results = json.load(json_results)
+            t_stats = results['t_stats']
+            p_values = results['p_values']
             
-            t_stats, p_values = lmm.run(
-                pheno_vector,
-                genotype_matrix,
-                restricted_max_likelihood=True,
-                refit=False,
-                temp_data=tempdata
-            )
+            print("p_values:", p_values)
+            
+            #t_stats, p_values = lmm.run(
+            #    pheno_vector,
+            #    genotype_matrix,
+            #    restricted_max_likelihood=True,
+            #    refit=False,
+            #    temp_data=tempdata
+            #)
             #print("p_values:", p_values)
-        
+
         self.dataset.group.markers.add_pvalues(p_values)
         return self.dataset.group.markers.markers
 
 
-    def gen_human_results(self, pheno_vector, tempdata):
+    #def gen_human_results(self, pheno_vector, tempdata):
+    def gen_human_results(self, pheno_vector, key, temp_uuid):
         file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
 
         plink_input = input.plink(file_base, type='b')
@@ -106,16 +159,45 @@ class MarkerRegression(object):
         kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
         kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
 
-        p_values, t_stats = lmm.run_human(
-                pheno_vector,
-                covariate_matrix,
-                input_file_name,
-                kinship_matrix,
-                loading_progress=tempdata
-            )
+        print("Before creating params")
+
+        params = dict(pheno_vector = pheno_vector.tolist(),
+                    covariate_matrix = covariate_matrix.tolist(),
+                    input_file_name = input_file_name,
+                    kinship_matrix = kinship_matrix.tolist(),
+                    refit = False,
+                    temp_uuid = temp_uuid,
+                        
+                    # meta data
+                    timestamp = datetime.datetime.isoformat(),
+                    )
+        
+        print("After creating params")
+        
+        json_params = json.dumps(params)
+        Redis.set(key, json_params)
+        Redis.expire(key, 60*60)
 
-        return p_values, t_stats
+        print("Before creating the command")
 
+        command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key,
+                                                                                          "non-human")
+        
+        print("command is:", command)
+        
+        os.system(command)
+
+        p_values, t_stats = lmm.run_human(key)
+
+        #p_values, t_stats = lmm.run_human(
+        #        pheno_vector,
+        #        covariate_matrix,
+        #        input_file_name,
+        #        kinship_matrix,
+        #        loading_progress=tempdata
+        #    )
+
+        return p_values, t_stats
 
     def identify_empty_samples(self):
         no_val_samples = []
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
old mode 100644
new mode 100755
index 6d12991f..2560aa9e
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import, print_function, division
 
 import sys
 import time
+import argparse
 import uuid
 
 import numpy as np
@@ -27,6 +28,8 @@ from scipy import optimize
 from scipy import stats
 import pdb
 
+import simplejson as json
+
 import gzip
 import zlib
 import datetime
@@ -40,17 +43,24 @@ from utility import temp_data
 
 from wqflask.my_pylmm.pyLMM import chunks
 
-import redis
-Redis = redis.Redis()
+from redis import Redis
+Redis = Redis()
 
 #np.seterr('raise')
 
+#def run_human(pheno_vector,
+#            covariate_matrix,
+#            plink_input_file,
+#            kinship_matrix,
+#            refit=False,
+#            loading_progress=None):
+
 def run_human(pheno_vector,
             covariate_matrix,
             plink_input_file,
             kinship_matrix,
             refit=False,
-            loading_progress=None):
+            tempdata=None):
 
     v = np.isnan(pheno_vector)
     keep = True - v
@@ -142,7 +152,7 @@ def run_human(pheno_vector,
 
             percent_complete = (float(count) / total_snps) * 100
             #print("percent_complete: ", percent_complete)
-            loading_progress.store("percent_complete", percent_complete)
+            tempdata.store("percent_complete", percent_complete)
 
             #with Bench("actual association"):
             ps, ts = human_association(snp,
@@ -218,11 +228,17 @@ def human_association(snp,
     return ps, ts
 
 
-def run(pheno_vector,
+#def run(pheno_vector,
+#        genotype_matrix,
+#        restricted_max_likelihood=True,
+#        refit=False,
+#        temp_data=None):
+    
+def run_other(pheno_vector,
         genotype_matrix,
         restricted_max_likelihood=True,
         refit=False,
-        temp_data=None):
+        tempdata=None):
     """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
     
     restricted_max_likelihood -- whether to use restricted max likelihood; True or False
@@ -232,8 +248,10 @@ def run(pheno_vector,
     
     """
     
+    
+    print("In run_other")
     with Bench("Calculate Kinship"):
-        kinship_matrix = calculate_kinship(genotype_matrix, temp_data)
+        kinship_matrix = calculate_kinship(genotype_matrix, tempdata)
     
     print("kinship_matrix: ", pf(kinship_matrix))
     print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -252,7 +270,7 @@ def run(pheno_vector,
                                 kinship_matrix,
                                 restricted_max_likelihood=True,
                                 refit=False,
-                                temp_data=temp_data)
+                                temp_data=tempdata)
     Bench().report()
     return t_stats, p_values
 
@@ -677,3 +695,48 @@ class LMM:
        pl.xlabel("Heritability")
        pl.ylabel("Probability of data")
        pl.title(title)
+
+    def main():
+        parser = argparse.ArgumentParser(description='Run pyLMM')
+        parser.add_argument('-k', '--key')
+        
+        opts = parser.parse_args()
+        
+        key = opts.key
+        
+        json_params = Redis.get(key)
+        
+        params = json.loads(json_params)
+        print("params:", params)
+    
+        is_human = params['human']
+        
+        tempdata = temp_data.TempData(params['temp_uuid'])
+        if is_human:
+            ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
+                      covariate_matrix = np.array(params['covariate_matrix']),
+                      plink_input_file = params['input_file_name'],
+                      kinship_matrix = np.array(params['kinship_matrix']),
+                      refit = params['refit'],
+                      tempdata = tempdata)
+        else:
+            ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']),
+                      genotype_matrix = np.array(params['genotype_matrix']),
+                      restricted_max_likelihood = params['restricted_max_likelihood'],
+                      refit = params['refit'],
+                      tempdata = tempdata)
+            
+        results_key = "pylmm:results:" + params['temp_uuid']
+
+        json_results = json.dumps(dict(p_values = ps,
+                                       t_stats = ts))
+        
+        #Pushing json_results into a list where it is the only item because blpop needs a list
+        Redis.rpush(results_key, json_results)
+        Redis.expire(results_key, 60*60)
+
+if __name__ == '__main__':
+    main()
+
+
+
-- 
cgit 1.4.1