From dd3f7bb79d39252a987826a9825d00da782ba58a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 2 May 2013 22:58:18 +0000 Subject: Got quick search code running (but not displaying properly) Code that processes subset of snps works, in process of putting into a class --- wqflask/maintenance/quick_search_table.py | 4 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 79 +++++++------- wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 131 +++++++++++++++++++++--- wqflask/wqflask/search_results.py | 14 +-- wqflask/wqflask/templates/quick_search.html | 6 +- 5 files changed, 173 insertions(+), 61 deletions(-) (limited to 'wqflask') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 4f2cd8a9..b07e7656 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -495,9 +495,9 @@ def main(): Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. """ - #ProbeSetXRef.run() + ProbeSetXRef.run() #GenoXRef.run() - PublishXRef.run() + #PublishXRef.run() if __name__ == "__main__": main() \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 10221a2e..fc021a0b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -28,6 +28,7 @@ from scipy import stats import pdb import gzip +import zlib import datetime import cPickle as pickle import simplejson as json @@ -55,15 +56,26 @@ def run_human(pheno_vector, keep = True - v keep = keep.reshape((len(keep),)) + identifier = str(uuid.uuid4()) + + lmm_vars = pickle.dumps(dict( + pheno_vector = pheno_vector, + covariate_matrix = covariate_matrix, + kinship_matrix = kinship_matrix + )) + Redis.hset(identifier, "lmm_vars", lmm_vars) + Redis.expire(identifier, 60*60) + if v.sum(): pheno_vector = pheno_vector[keep] #print("pheno_vector shape is now: ", pf(pheno_vector.shape)) covariate_matrix = covariate_matrix[keep,:] - #print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) + print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) #print("len(keep) is: ", pf(keep.shape)) kinship_matrix = kinship_matrix[keep,:][:,keep] n = kinship_matrix.shape[0] + print("n is:", n) lmm_ob = LMM(pheno_vector, kinship_matrix, covariate_matrix) @@ -96,19 +108,15 @@ def run_human(pheno_vector, results = chunks.divide_into_chunks(inputs, 64) result_store = [] - identifier = str(uuid.uuid4()) - - lmm_vars = pickle.dumps(dict( - pheno_vector = pheno_vector, - covariate_matrix = covariate_matrix, - kinship_matrix = kinship_matrix - )) - Redis.hset(identifier, "lmm_vars", pickle.dumps(lmm_vars)) - key = "plink_inputs" + + # Todo: Delete below line when done testing + Redis.delete(key) + timestamp = datetime.datetime.utcnow().isoformat() + print("Starting adding loop") for part, result in enumerate(results): #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) holder = pickle.dumps(dict( @@ -117,33 +125,34 @@ def run_human(pheno_vector, timestamp = timestamp, result = result ), pickle.HIGHEST_PROTOCOL) + print("Adding:", part) - Redis.rpush(key, holder) - + Redis.rpush(key, zlib.compress(holder)) + print("End adding loop") print("***** Added to {} queue *****".format(key)) for snp, this_id in plink_input: - with Bench("part before association"): - if count > 2000: - break - count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - loading_progress.store("percent_complete", percent_complete) - - with Bench("actual association"): - ps, ts = human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit) - - with Bench("after association"): - p_values.append(ps) - t_stats.append(ts) + #with Bench("part before association"): + if count > 2000: + break + count += 1 + + percent_complete = (float(count) / total_snps) * 100 + #print("percent_complete: ", percent_complete) + loading_progress.store("percent_complete", percent_complete) + + #with Bench("actual association"): + ps, ts = human_association(snp, + n, + keep, + lmm_ob, + pheno_vector, + covariate_matrix, + kinship_matrix, + refit) + + #with Bench("after association"): + p_values.append(ps) + t_stats.append(ts) return p_values, t_stats @@ -326,7 +335,7 @@ def GWAS(pheno_vector, covariate_matrix - n x q covariate matrix restricted_max_likelihood - use restricted maximum likelihood refit - refit the variance component for each SNP - + """ if kinship_eigen_vals == None: kinship_eigen_vals = [] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py index 1274fe50..e47c18e1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py @@ -1,28 +1,127 @@ from __future__ import absolute_import, print_function, division +import sys +sys.path.append("../../..") + +print("sys.path: ", sys.path) + +import numpy as np + +import zlib import cPickle as pickle import redis Redis = redis.Redis() -from wqflask.my_pylmm.pyLMM import lmm - -lmm_vars_pickled = Redis.get("lmm_vars") - +import lmm -plink_pickled = Redis.lpop("plink_inputs") +class ProcessLmmChunk(object): + + def __init__(self): + self.get_snp_data() + self.get_lmm_vars() + + keep = self.trim_matrices() + + self.do_association(keep) + + print("p_values is: ", self.p_values) + + def get_snp_data(self): + plink_pickled = zlib.decompress(Redis.lpop("plink_inputs")) + plink_data = pickle.loads(plink_pickled) + + self.snps = np.array(plink_data['result']) + self.identifier = plink_data['identifier'] + + def get_lmm_vars(self): + lmm_vars_pickled = Redis.hget(self.identifier, "lmm_vars") + lmm_vars = pickle.loads(lmm_vars_pickled) + + self.pheno_vector = np.array(lmm_vars['pheno_vector']) + self.covariate_matrix = np.array(lmm_vars['covariate_matrix']) + self.kinship_matrix = np.array(lmm_vars['kinship_matrix']) + + def trim_matrices(self): + v = np.isnan(self.pheno_vector) + keep = True - v + keep = keep.reshape((len(keep),)) + + if v.sum(): + self.pheno_vector = self.pheno_vector[keep] + self.covariate_matrix = self.covariate_matrix[keep,:] + self.kinship_matrix = self.kinship_matrix[keep,:][:,keep] -plink_data = pickle.loads(plink_pickled) + return keep + + def do_association(self, keep): + n = self.kinship_matrix.shape[0] + lmm_ob = lmm.LMM(self.pheno_vector, + self.kinship_matrix, + self.covariate_matrix) + lmm_ob.fit() + + self.p_values = [] + + for snp in self.snps: + snp = snp[0] + p_value, t_stat = lmm.human_association(snp, + n, + keep, + lmm_ob, + self.pheno_vector, + self.covariate_matrix, + self.kinship_matrix, + False) + + self.p_values.append(p_value) + -identifier = plink_data['identifier'] -print("identifier: ", identifier) +#plink_pickled = zlib.decompress(Redis.lpop("plink_inputs")) +# +#plink_data = pickle.loads(plink_pickled) +#result = np.array(plink_data['result']) +#print("snp size is: ", result.shape) +#identifier = plink_data['identifier'] +# +#lmm_vars_pickled = Redis.hget(identifier, "lmm_vars") +#lmm_vars = pickle.loads(lmm_vars_pickled) +# +#pheno_vector = np.array(lmm_vars['pheno_vector']) +#covariate_matrix = np.array(lmm_vars['covariate_matrix']) +#kinship_matrix = np.array(lmm_vars['kinship_matrix']) +# +#v = np.isnan(pheno_vector) +#keep = True - v +#keep = keep.reshape((len(keep),)) +#print("keep is: ", keep) +# +#if v.sum(): +# pheno_vector = pheno_vector[keep] +# covariate_matrix = covariate_matrix[keep,:] +# kinship_matrix = kinship_matrix[keep,:][:,keep] +# +#n = kinship_matrix.shape[0] +#print("n is: ", n) +#lmm_ob = lmm.LMM(pheno_vector, +# kinship_matrix, +# covariate_matrix) +#lmm_ob.fit() +# +#p_values = [] +# +#for snp in result: +# snp = snp[0] +# p_value, t_stat = lmm.human_association(snp, +# n, +# keep, +# lmm_ob, +# pheno_vector, +# covariate_matrix, +# kinship_matrix, +# False) +# +# p_values.append(p_value) + -ps, ts = lmm.human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit) diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index dc872a8b..89f146b3 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -26,8 +26,7 @@ from MySQLdb import escape_string as escape from htmlgen import HTMLgen2 as HT from base import webqtlConfig -from utility.THCell import THCell -from utility.TDCell import TDCell +from utility.benchmark import Bench from base.data_set import create_dataset from base.trait import GeneralTrait from wqflask import parser @@ -124,13 +123,16 @@ class SearchResultPage(object): FROM QuickSearch WHERE MATCH (terms) AGAINST ('{}' IN BOOLEAN MODE) """.format(search_terms) - dbresults = g.db.execute(query, no_parameters=True).fetchall() + #print("query is: ", query) + + with Bench("Doing QuickSearch Query: "): + dbresults = g.db.execute(query, no_parameters=True).fetchall() #print("results: ", pf(results)) self.results = collections.defaultdict(list) type_dict = {'PublishXRef': 'phenotype', - 'ProbesetXRef': 'mrna_assay', + 'ProbeSetXRef': 'mrna_assay', 'GenoXRef': 'genotype'} for dbresult in dbresults: @@ -141,7 +143,7 @@ class SearchResultPage(object): self.results[type_dict[dbresult.table_name]].append(this_result) - print("results: ", pf(self.results['phenotype'])) + #print("results: ", pf(self.results['phenotype'])) #def quick_search(self): # self.search_terms = parser.parse(self.search_terms) @@ -209,6 +211,6 @@ class SearchResultPage(object): self.dataset, ) self.results.extend(the_search.run()) - print("in the search results are:", self.results) + #print("in the search results are:", self.results) self.header_fields = the_search.header_fields diff --git a/wqflask/wqflask/templates/quick_search.html b/wqflask/wqflask/templates/quick_search.html index d50b4937..769c40e6 100644 --- a/wqflask/wqflask/templates/quick_search.html +++ b/wqflask/wqflask/templates/quick_search.html @@ -42,9 +42,11 @@ - + {% endfor %} + #} -- cgit v1.2.3
Id Species Group