From a0f5aae35941d880f58f178c80bfe0b346d7e8af Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 30 Jan 2014 00:53:21 +0000 Subject: Most of the work is done towards running lmm.py from the command line and storing the results in redis --- wqflask/base/data_set.py | 2 +- .../wqflask/marker_regression/marker_regression.py | 130 +++++++++++++++++---- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 79 +++++++++++-- 3 files changed, 178 insertions(+), 33 deletions(-) mode change 100644 => 100755 wqflask/wqflask/my_pylmm/pyLMM/lmm.py (limited to 'wqflask') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 9fbccf80..3deaa655 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -184,7 +184,7 @@ class HumanMarkers(Markers): self.markers = [] for line in marker_data_fh: splat = line.strip().split() - print("splat:", splat) + #print("splat:", splat) marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 5fa288df..1594d35d 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -7,16 +7,20 @@ from pprint import pformat as pf import string import sys +import datetime import os import collections +import uuid import numpy as np from scipy import linalg -import simplejson as json +import cPickle as pickle -#from redis import Redis +import simplejson as json +from redis import Redis +Redis = Redis() from base.trait import GeneralTrait from base import data_set @@ -38,7 +42,7 @@ class MarkerRegression(object): helper_functions.get_species_dataset_trait(self, start_vars) - tempdata = temp_data.TempData(temp_uuid) + #tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] @@ -48,7 +52,8 @@ class MarkerRegression(object): self.samples.append(str(sample)) self.vals.append(value) - self.qtl_results = self.gen_data(tempdata) + #self.qtl_results = self.gen_data(tempdata) + self.qtl_results = self.gen_data(str(temp_uuid)) #Get chromosome lengths for drawing the manhattan plot chromosome_mb_lengths = {} @@ -61,15 +66,23 @@ class MarkerRegression(object): ) - def gen_data(self, tempdata): + #def gen_data(self, tempdata): + def gen_data(self, temp_uuid): """Generates p-values for each marker""" self.dataset.group.get_markers() pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) + #lmm_uuid = str(uuid.uuid4()) + + key = "pylmm:input:" + temp_uuid + print("key is:", pf(key)) + #with Bench("Loading cache"): + # result = Redis.get(key) + if self.dataset.group.species == "human": - p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) + p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) else: genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] @@ -77,25 +90,65 @@ class MarkerRegression(object): trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) genotype_matrix = np.array(trimmed_genotype_data).T - + #print("pheno_vector: ", pf(pheno_vector)) #print("genotype_matrix: ", pf(genotype_matrix)) #print("genotype_matrix.shape: ", pf(genotype_matrix.shape)) + + #params = {"pheno_vector": pheno_vector, + # "genotype_matrix": genotype_matrix, + # "restricted_max_likelihood": True, + # "refit": False, + # "temp_data": tempdata} + + params = dict(pheno_vector = pheno_vector.tolist(), + genotype_matrix = genotype_matrix.tolist(), + restricted_max_likelihood = True, + refit = False, + temp_uuid = temp_uuid, + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + print("json_params:", json_params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + print("before printing command") + + command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key, + "non-human") + print("command is:", command) + print("after printing command") + + os.system(command) + + #t_stats, p_values = lmm.run(key) + #lmm.run(key) + + json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) + results = json.load(json_results) + t_stats = results['t_stats'] + p_values = results['p_values'] - t_stats, p_values = lmm.run( - pheno_vector, - genotype_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=tempdata - ) + print("p_values:", p_values) + + #t_stats, p_values = lmm.run( + # pheno_vector, + # genotype_matrix, + # restricted_max_likelihood=True, + # refit=False, + # temp_data=tempdata + #) #print("p_values:", p_values) - + self.dataset.group.markers.add_pvalues(p_values) return self.dataset.group.markers.markers - def gen_human_results(self, pheno_vector, tempdata): + #def gen_human_results(self, pheno_vector, tempdata): + def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') @@ -106,16 +159,45 @@ class MarkerRegression(object): kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) - p_values, t_stats = lmm.run_human( - pheno_vector, - covariate_matrix, - input_file_name, - kinship_matrix, - loading_progress=tempdata - ) + print("Before creating params") + + params = dict(pheno_vector = pheno_vector.tolist(), + covariate_matrix = covariate_matrix.tolist(), + input_file_name = input_file_name, + kinship_matrix = kinship_matrix.tolist(), + refit = False, + temp_uuid = temp_uuid, + + # meta data + timestamp = datetime.datetime.isoformat(), + ) + + print("After creating params") + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) - return p_values, t_stats + print("Before creating the command") + command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key, + "non-human") + + print("command is:", command) + + os.system(command) + + p_values, t_stats = lmm.run_human(key) + + #p_values, t_stats = lmm.run_human( + # pheno_vector, + # covariate_matrix, + # input_file_name, + # kinship_matrix, + # loading_progress=tempdata + # ) + + return p_values, t_stats def identify_empty_samples(self): no_val_samples = [] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py old mode 100644 new mode 100755 index 6d12991f..2560aa9e --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,6 +19,7 @@ from __future__ import absolute_import, print_function, division import sys import time +import argparse import uuid import numpy as np @@ -27,6 +28,8 @@ from scipy import optimize from scipy import stats import pdb +import simplejson as json + import gzip import zlib import datetime @@ -40,17 +43,24 @@ from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks -import redis -Redis = redis.Redis() +from redis import Redis +Redis = Redis() #np.seterr('raise') +#def run_human(pheno_vector, +# covariate_matrix, +# plink_input_file, +# kinship_matrix, +# refit=False, +# loading_progress=None): + def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, refit=False, - loading_progress=None): + tempdata=None): v = np.isnan(pheno_vector) keep = True - v @@ -142,7 +152,7 @@ def run_human(pheno_vector, percent_complete = (float(count) / total_snps) * 100 #print("percent_complete: ", percent_complete) - loading_progress.store("percent_complete", percent_complete) + tempdata.store("percent_complete", percent_complete) #with Bench("actual association"): ps, ts = human_association(snp, @@ -218,11 +228,17 @@ def human_association(snp, return ps, ts -def run(pheno_vector, +#def run(pheno_vector, +# genotype_matrix, +# restricted_max_likelihood=True, +# refit=False, +# temp_data=None): + +def run_other(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False, - temp_data=None): + tempdata=None): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False @@ -232,8 +248,10 @@ def run(pheno_vector, """ + + print("In run_other") with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, temp_data) + kinship_matrix = calculate_kinship(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -252,7 +270,7 @@ def run(pheno_vector, kinship_matrix, restricted_max_likelihood=True, refit=False, - temp_data=temp_data) + temp_data=tempdata) Bench().report() return t_stats, p_values @@ -677,3 +695,48 @@ class LMM: pl.xlabel("Heritability") pl.ylabel("Probability of data") pl.title(title) + + def main(): + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + + opts = parser.parse_args() + + key = opts.key + + json_params = Redis.get(key) + + params = json.loads(json_params) + print("params:", params) + + is_human = params['human'] + + tempdata = temp_data.TempData(params['temp_uuid']) + if is_human: + ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), + covariate_matrix = np.array(params['covariate_matrix']), + plink_input_file = params['input_file_name'], + kinship_matrix = np.array(params['kinship_matrix']), + refit = params['refit'], + tempdata = tempdata) + else: + ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']), + genotype_matrix = np.array(params['genotype_matrix']), + restricted_max_likelihood = params['restricted_max_likelihood'], + refit = params['refit'], + tempdata = tempdata) + + results_key = "pylmm:results:" + params['temp_uuid'] + + json_results = json.dumps(dict(p_values = ps, + t_stats = ts)) + + #Pushing json_results into a list where it is the only item because blpop needs a list + Redis.rpush(results_key, json_results) + Redis.expire(results_key, 60*60) + +if __name__ == '__main__': + main() + + + -- cgit v1.2.3