From dd3f7bb79d39252a987826a9825d00da782ba58a Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 2 May 2013 22:58:18 +0000
Subject: Got quick search code running (but not displaying properly)

Code that processes subset of snps works, in process of
putting into a class
---
 wqflask/maintenance/quick_search_table.py       |   4 +-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py           |  79 +++++++-------
 wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 131 +++++++++++++++++++++---
 wqflask/wqflask/search_results.py               |  14 +--
 wqflask/wqflask/templates/quick_search.html     |   6 +-
 5 files changed, 173 insertions(+), 61 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 4f2cd8a9..b07e7656 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -495,9 +495,9 @@ def main():
     Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
     
     """
-    #ProbeSetXRef.run()
+    ProbeSetXRef.run()
     #GenoXRef.run()
-    PublishXRef.run()
+    #PublishXRef.run()
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 10221a2e..fc021a0b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -28,6 +28,7 @@ from scipy import stats
 import pdb
 
 import gzip
+import zlib
 import datetime
 import cPickle as pickle
 import simplejson as json
@@ -55,15 +56,26 @@ def run_human(pheno_vector,
     keep = True - v
     keep = keep.reshape((len(keep),))
 
+    identifier = str(uuid.uuid4())
+    
+    lmm_vars = pickle.dumps(dict(
+        pheno_vector = pheno_vector,
+        covariate_matrix = covariate_matrix,
+        kinship_matrix = kinship_matrix
+    ))
+    Redis.hset(identifier, "lmm_vars", lmm_vars)
+    Redis.expire(identifier, 60*60)
+
     if v.sum():
         pheno_vector = pheno_vector[keep]
         #print("pheno_vector shape is now: ", pf(pheno_vector.shape))
         covariate_matrix = covariate_matrix[keep,:]
-        #print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
+        print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
         #print("len(keep) is: ", pf(keep.shape))
         kinship_matrix = kinship_matrix[keep,:][:,keep]
 
     n = kinship_matrix.shape[0]
+    print("n is:", n)
     lmm_ob = LMM(pheno_vector,
                 kinship_matrix,
                 covariate_matrix)
@@ -96,19 +108,15 @@ def run_human(pheno_vector,
             results = chunks.divide_into_chunks(inputs, 64)
 
         result_store = []
-        identifier = str(uuid.uuid4())
-        
-        lmm_vars = pickle.dumps(dict(
-            pheno_vector = pheno_vector,
-            covariate_matrix = covariate_matrix,
-            kinship_matrix = kinship_matrix
-        ))
-        Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars))
-
 
         key = "plink_inputs"
+        
+        # Todo: Delete below line when done testing
+        Redis.delete(key)
+        
         timestamp = datetime.datetime.utcnow().isoformat()
 
+        print("Starting adding loop")
         for part, result in enumerate(results):
             #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
             holder = pickle.dumps(dict(
@@ -117,33 +125,34 @@ def run_human(pheno_vector,
                 timestamp = timestamp,
                 result = result
             ), pickle.HIGHEST_PROTOCOL)
+            
             print("Adding:", part)
-            Redis.rpush(key, holder)
-
+            Redis.rpush(key, zlib.compress(holder))
+        print("End adding loop")
         print("***** Added to {} queue *****".format(key))
         for snp, this_id in plink_input:
-            with Bench("part before association"):
-                if count > 2000:
-                    break
-                count += 1
-
-                percent_complete = (float(count) / total_snps) * 100
-                #print("percent_complete: ", percent_complete)
-                loading_progress.store("percent_complete", percent_complete)
-        
-            with Bench("actual association"):
-                ps, ts = human_association(snp,
-                                           n,
-                                           keep,
-                                           lmm_ob,
-                                           pheno_vector,
-                                           covariate_matrix,
-                                           kinship_matrix,
-                                           refit)
-
-            with Bench("after association"):
-                p_values.append(ps)
-                t_stats.append(ts)
+            #with Bench("part before association"):
+            if count > 2000:
+                break
+            count += 1
+
+            percent_complete = (float(count) / total_snps) * 100
+            #print("percent_complete: ", percent_complete)
+            loading_progress.store("percent_complete", percent_complete)
+
+            #with Bench("actual association"):
+            ps, ts = human_association(snp,
+                                       n,
+                                       keep,
+                                       lmm_ob,
+                                       pheno_vector,
+                                       covariate_matrix,
+                                       kinship_matrix,
+                                       refit)
+
+            #with Bench("after association"):
+            p_values.append(ps)
+            t_stats.append(ts)
         
     return p_values, t_stats
 
@@ -326,7 +335,7 @@ def GWAS(pheno_vector,
     covariate_matrix - n x q covariate matrix
     restricted_max_likelihood - use restricted maximum likelihood
     refit - refit the variance component for each SNP
-      
+    
     """
     if kinship_eigen_vals == None:
         kinship_eigen_vals = []
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
index 1274fe50..e47c18e1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -1,28 +1,127 @@
 from __future__ import absolute_import, print_function, division
 
+import sys
+sys.path.append("../../..")
+
+print("sys.path: ", sys.path)
+
+import numpy as np
+
+import zlib
 import cPickle as pickle
 import redis
 Redis = redis.Redis()
 
-from wqflask.my_pylmm.pyLMM import lmm
-
-lmm_vars_pickled = Redis.get("lmm_vars")
-
+import lmm
 
-plink_pickled = Redis.lpop("plink_inputs")
+class ProcessLmmChunk(object):
+    
+    def __init__(self):
+        self.get_snp_data()
+        self.get_lmm_vars()
+        
+        keep = self.trim_matrices()
+        
+        self.do_association(keep)
+        
+        print("p_values is: ", self.p_values)
+        
+    def get_snp_data(self):
+        plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+        plink_data = pickle.loads(plink_pickled)
+        
+        self.snps = np.array(plink_data['result'])
+        self.identifier = plink_data['identifier']
+        
+    def get_lmm_vars(self):
+        lmm_vars_pickled = Redis.hget(self.identifier, "lmm_vars")
+        lmm_vars = pickle.loads(lmm_vars_pickled)
+        
+        self.pheno_vector = np.array(lmm_vars['pheno_vector'])
+        self.covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+        self.kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+        
+    def trim_matrices(self):
+        v = np.isnan(self.pheno_vector)
+        keep = True - v
+        keep = keep.reshape((len(keep),))
+        
+        if v.sum():
+            self.pheno_vector = self.pheno_vector[keep]
+            self.covariate_matrix = self.covariate_matrix[keep,:]
+            self.kinship_matrix = self.kinship_matrix[keep,:][:,keep]
 
-plink_data = pickle.loads(plink_pickled)
+        return keep
+    
+    def do_association(self, keep):
+        n = self.kinship_matrix.shape[0]
+        lmm_ob = lmm.LMM(self.pheno_vector,
+                    self.kinship_matrix,
+                    self.covariate_matrix)
+        lmm_ob.fit()
+    
+        self.p_values = []
+        
+        for snp in self.snps:
+            snp = snp[0]
+            p_value, t_stat = lmm.human_association(snp,
+                                        n,
+                                        keep,
+                                        lmm_ob,
+                                        self.pheno_vector,
+                                        self.covariate_matrix,
+                                        self.kinship_matrix,
+                                        False)
+        
+            self.p_values.append(p_value)
+            
 
-identifier = plink_data['identifier']
-print("identifier: ", identifier)
+#plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+#
+#plink_data = pickle.loads(plink_pickled)
+#result = np.array(plink_data['result'])
+#print("snp size is: ", result.shape)
+#identifier = plink_data['identifier']
+#
+#lmm_vars_pickled = Redis.hget(identifier, "lmm_vars")
+#lmm_vars = pickle.loads(lmm_vars_pickled)
+#
+#pheno_vector = np.array(lmm_vars['pheno_vector'])
+#covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+#kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+#
+#v = np.isnan(pheno_vector)
+#keep = True - v
+#keep = keep.reshape((len(keep),))
+#print("keep is: ", keep)
+#
+#if v.sum():
+#    pheno_vector = pheno_vector[keep]
+#    covariate_matrix = covariate_matrix[keep,:]
+#    kinship_matrix = kinship_matrix[keep,:][:,keep]
+#
+#n = kinship_matrix.shape[0]
+#print("n is: ", n)
+#lmm_ob = lmm.LMM(pheno_vector,
+#            kinship_matrix,
+#            covariate_matrix)
+#lmm_ob.fit()
+#
+#p_values = []
+#
+#for snp in result:
+#    snp = snp[0]
+#    p_value, t_stat = lmm.human_association(snp,
+#                                n,
+#                                keep,
+#                                lmm_ob,
+#                                pheno_vector,
+#                                covariate_matrix,
+#                                kinship_matrix,
+#                                False)
+#
+#    p_values.append(p_value)
+    
 
 
 
-ps, ts = lmm.human_association(snp,
-                                n,
-                                keep,
-                                lmm_ob,
-                                pheno_vector,
-                                covariate_matrix,
-                                kinship_matrix,
-                                refit)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index dc872a8b..89f146b3 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -26,8 +26,7 @@ from MySQLdb import escape_string as escape
 from htmlgen import HTMLgen2 as HT
 
 from base import webqtlConfig
-from utility.THCell import THCell
-from utility.TDCell import TDCell
+from utility.benchmark import Bench
 from base.data_set import create_dataset
 from base.trait import GeneralTrait
 from wqflask import parser
@@ -124,13 +123,16 @@ class SearchResultPage(object):
                     FROM QuickSearch
                     WHERE MATCH (terms)
                           AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms)
-        dbresults = g.db.execute(query, no_parameters=True).fetchall()
+        #print("query is: ", query)
+        
+        with Bench("Doing QuickSearch Query: "):
+            dbresults = g.db.execute(query, no_parameters=True).fetchall()
         #print("results: ", pf(results))
         
         self.results = collections.defaultdict(list)
         
         type_dict = {'PublishXRef': 'phenotype',
-                   'ProbesetXRef': 'mrna_assay',
+                   'ProbeSetXRef': 'mrna_assay',
                    'GenoXRef': 'genotype'}
 
         for dbresult in dbresults:
@@ -141,7 +143,7 @@ class SearchResultPage(object):
             
             self.results[type_dict[dbresult.table_name]].append(this_result)
             
-        print("results: ", pf(self.results['phenotype']))
+        #print("results: ", pf(self.results['phenotype']))
 
     #def quick_search(self):
     #    self.search_terms = parser.parse(self.search_terms)
@@ -209,6 +211,6 @@ class SearchResultPage(object):
                                     self.dataset,
                                     )
             self.results.extend(the_search.run())
-            print("in the search results are:", self.results)
+            #print("in the search results are:", self.results)
 
         self.header_fields = the_search.header_fields
diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html
index d50b4937..769c40e6 100644
--- a/wqflask/wqflask/templates/quick_search.html
+++ b/wqflask/wqflask/templates/quick_search.html
@@ -42,9 +42,11 @@
                     <table class="table table-hover">
                         <thead>
                             <tr>
-                       <!--     {% for key, _value in results.phenotype[0].result_fields.items() %}
+                        {#
+                            {% for key, _value in results.phenotype[0].result_fields.items() %}
                                 <th>{{key}}</th>
-                            {% endfor %}-->
+                            {% endfor %}
+                        #}
                             <th>Id</th>
                             <th>Species</th>
                             <th>Group</th>
-- 
cgit v1.2.3