aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Sloan2013-05-02 22:58:18 +0000
committerZachary Sloan2013-05-02 22:58:18 +0000
commitdd3f7bb79d39252a987826a9825d00da782ba58a (patch)
tree3ee41e0df9e2efd6857d78226e8f856cc19c7116
parentbfb2195d4aa6af0814a50998b57c89c3d497b4db (diff)
downloadgenenetwork2-dd3f7bb79d39252a987826a9825d00da782ba58a.tar.gz
Got quick search code running (but not displaying properly)
Code that processes subset of snps works, in process of putting into a class
-rw-r--r--wqflask/maintenance/quick_search_table.py4
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py79
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/process_plink.py131
-rw-r--r--wqflask/wqflask/search_results.py14
-rw-r--r--wqflask/wqflask/templates/quick_search.html6
5 files changed, 173 insertions, 61 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 4f2cd8a9..b07e7656 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -495,9 +495,9 @@ def main():
Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
"""
- #ProbeSetXRef.run()
+ ProbeSetXRef.run()
#GenoXRef.run()
- PublishXRef.run()
+ #PublishXRef.run()
if __name__ == "__main__":
main() \ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 10221a2e..fc021a0b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -28,6 +28,7 @@ from scipy import stats
import pdb
import gzip
+import zlib
import datetime
import cPickle as pickle
import simplejson as json
@@ -55,15 +56,26 @@ def run_human(pheno_vector,
keep = True - v
keep = keep.reshape((len(keep),))
+ identifier = str(uuid.uuid4())
+
+ lmm_vars = pickle.dumps(dict(
+ pheno_vector = pheno_vector,
+ covariate_matrix = covariate_matrix,
+ kinship_matrix = kinship_matrix
+ ))
+ Redis.hset(identifier, "lmm_vars", lmm_vars)
+ Redis.expire(identifier, 60*60)
+
if v.sum():
pheno_vector = pheno_vector[keep]
#print("pheno_vector shape is now: ", pf(pheno_vector.shape))
covariate_matrix = covariate_matrix[keep,:]
- #print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
+ print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
#print("len(keep) is: ", pf(keep.shape))
kinship_matrix = kinship_matrix[keep,:][:,keep]
n = kinship_matrix.shape[0]
+ print("n is:", n)
lmm_ob = LMM(pheno_vector,
kinship_matrix,
covariate_matrix)
@@ -96,19 +108,15 @@ def run_human(pheno_vector,
results = chunks.divide_into_chunks(inputs, 64)
result_store = []
- identifier = str(uuid.uuid4())
-
- lmm_vars = pickle.dumps(dict(
- pheno_vector = pheno_vector,
- covariate_matrix = covariate_matrix,
- kinship_matrix = kinship_matrix
- ))
- Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars))
-
key = "plink_inputs"
+
+ # Todo: Delete below line when done testing
+ Redis.delete(key)
+
timestamp = datetime.datetime.utcnow().isoformat()
+ print("Starting adding loop")
for part, result in enumerate(results):
#data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
holder = pickle.dumps(dict(
@@ -117,33 +125,34 @@ def run_human(pheno_vector,
timestamp = timestamp,
result = result
), pickle.HIGHEST_PROTOCOL)
+
print("Adding:", part)
- Redis.rpush(key, holder)
-
+ Redis.rpush(key, zlib.compress(holder))
+ print("End adding loop")
print("***** Added to {} queue *****".format(key))
for snp, this_id in plink_input:
- with Bench("part before association"):
- if count > 2000:
- break
- count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- loading_progress.store("percent_complete", percent_complete)
-
- with Bench("actual association"):
- ps, ts = human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit)
-
- with Bench("after association"):
- p_values.append(ps)
- t_stats.append(ts)
+ #with Bench("part before association"):
+ if count > 2000:
+ break
+ count += 1
+
+ percent_complete = (float(count) / total_snps) * 100
+ #print("percent_complete: ", percent_complete)
+ loading_progress.store("percent_complete", percent_complete)
+
+ #with Bench("actual association"):
+ ps, ts = human_association(snp,
+ n,
+ keep,
+ lmm_ob,
+ pheno_vector,
+ covariate_matrix,
+ kinship_matrix,
+ refit)
+
+ #with Bench("after association"):
+ p_values.append(ps)
+ t_stats.append(ts)
return p_values, t_stats
@@ -326,7 +335,7 @@ def GWAS(pheno_vector,
covariate_matrix - n x q covariate matrix
restricted_max_likelihood - use restricted maximum likelihood
refit - refit the variance component for each SNP
-
+
"""
if kinship_eigen_vals == None:
kinship_eigen_vals = []
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
index 1274fe50..e47c18e1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
@@ -1,28 +1,127 @@
from __future__ import absolute_import, print_function, division
+import sys
+sys.path.append("../../..")
+
+print("sys.path: ", sys.path)
+
+import numpy as np
+
+import zlib
import cPickle as pickle
import redis
Redis = redis.Redis()
-from wqflask.my_pylmm.pyLMM import lmm
-
-lmm_vars_pickled = Redis.get("lmm_vars")
-
+import lmm
-plink_pickled = Redis.lpop("plink_inputs")
+class ProcessLmmChunk(object):
+
+ def __init__(self):
+ self.get_snp_data()
+ self.get_lmm_vars()
+
+ keep = self.trim_matrices()
+
+ self.do_association(keep)
+
+ print("p_values is: ", self.p_values)
+
+ def get_snp_data(self):
+ plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+ plink_data = pickle.loads(plink_pickled)
+
+ self.snps = np.array(plink_data['result'])
+ self.identifier = plink_data['identifier']
+
+ def get_lmm_vars(self):
+ lmm_vars_pickled = Redis.hget(self.identifier, "lmm_vars")
+ lmm_vars = pickle.loads(lmm_vars_pickled)
+
+ self.pheno_vector = np.array(lmm_vars['pheno_vector'])
+ self.covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+ self.kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+
+ def trim_matrices(self):
+ v = np.isnan(self.pheno_vector)
+ keep = True - v
+ keep = keep.reshape((len(keep),))
+
+ if v.sum():
+ self.pheno_vector = self.pheno_vector[keep]
+ self.covariate_matrix = self.covariate_matrix[keep,:]
+ self.kinship_matrix = self.kinship_matrix[keep,:][:,keep]
-plink_data = pickle.loads(plink_pickled)
+ return keep
+
+ def do_association(self, keep):
+ n = self.kinship_matrix.shape[0]
+ lmm_ob = lmm.LMM(self.pheno_vector,
+ self.kinship_matrix,
+ self.covariate_matrix)
+ lmm_ob.fit()
+
+ self.p_values = []
+
+ for snp in self.snps:
+ snp = snp[0]
+ p_value, t_stat = lmm.human_association(snp,
+ n,
+ keep,
+ lmm_ob,
+ self.pheno_vector,
+ self.covariate_matrix,
+ self.kinship_matrix,
+ False)
+
+ self.p_values.append(p_value)
+
-identifier = plink_data['identifier']
-print("identifier: ", identifier)
+#plink_pickled = zlib.decompress(Redis.lpop("plink_inputs"))
+#
+#plink_data = pickle.loads(plink_pickled)
+#result = np.array(plink_data['result'])
+#print("snp size is: ", result.shape)
+#identifier = plink_data['identifier']
+#
+#lmm_vars_pickled = Redis.hget(identifier, "lmm_vars")
+#lmm_vars = pickle.loads(lmm_vars_pickled)
+#
+#pheno_vector = np.array(lmm_vars['pheno_vector'])
+#covariate_matrix = np.array(lmm_vars['covariate_matrix'])
+#kinship_matrix = np.array(lmm_vars['kinship_matrix'])
+#
+#v = np.isnan(pheno_vector)
+#keep = True - v
+#keep = keep.reshape((len(keep),))
+#print("keep is: ", keep)
+#
+#if v.sum():
+# pheno_vector = pheno_vector[keep]
+# covariate_matrix = covariate_matrix[keep,:]
+# kinship_matrix = kinship_matrix[keep,:][:,keep]
+#
+#n = kinship_matrix.shape[0]
+#print("n is: ", n)
+#lmm_ob = lmm.LMM(pheno_vector,
+# kinship_matrix,
+# covariate_matrix)
+#lmm_ob.fit()
+#
+#p_values = []
+#
+#for snp in result:
+# snp = snp[0]
+# p_value, t_stat = lmm.human_association(snp,
+# n,
+# keep,
+# lmm_ob,
+# pheno_vector,
+# covariate_matrix,
+# kinship_matrix,
+# False)
+#
+# p_values.append(p_value)
+
-ps, ts = lmm.human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index dc872a8b..89f146b3 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -26,8 +26,7 @@ from MySQLdb import escape_string as escape
from htmlgen import HTMLgen2 as HT
from base import webqtlConfig
-from utility.THCell import THCell
-from utility.TDCell import TDCell
+from utility.benchmark import Bench
from base.data_set import create_dataset
from base.trait import GeneralTrait
from wqflask import parser
@@ -124,13 +123,16 @@ class SearchResultPage(object):
FROM QuickSearch
WHERE MATCH (terms)
AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms)
- dbresults = g.db.execute(query, no_parameters=True).fetchall()
+ #print("query is: ", query)
+
+ with Bench("Doing QuickSearch Query: "):
+ dbresults = g.db.execute(query, no_parameters=True).fetchall()
#print("results: ", pf(results))
self.results = collections.defaultdict(list)
type_dict = {'PublishXRef': 'phenotype',
- 'ProbesetXRef': 'mrna_assay',
+ 'ProbeSetXRef': 'mrna_assay',
'GenoXRef': 'genotype'}
for dbresult in dbresults:
@@ -141,7 +143,7 @@ class SearchResultPage(object):
self.results[type_dict[dbresult.table_name]].append(this_result)
- print("results: ", pf(self.results['phenotype']))
+ #print("results: ", pf(self.results['phenotype']))
#def quick_search(self):
# self.search_terms = parser.parse(self.search_terms)
@@ -209,6 +211,6 @@ class SearchResultPage(object):
self.dataset,
)
self.results.extend(the_search.run())
- print("in the search results are:", self.results)
+ #print("in the search results are:", self.results)
self.header_fields = the_search.header_fields
diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html
index d50b4937..769c40e6 100644
--- a/wqflask/wqflask/templates/quick_search.html
+++ b/wqflask/wqflask/templates/quick_search.html
@@ -42,9 +42,11 @@
<table class="table table-hover">
<thead>
<tr>
- <!-- {% for key, _value in results.phenotype[0].result_fields.items() %}
+ {#
+ {% for key, _value in results.phenotype[0].result_fields.items() %}
<th>{{key}}</th>
- {% endfor %}-->
+ {% endfor %}
+ #}
<th>Id</th>
<th>Species</th>
<th>Group</th>