aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2014-01-30 00:53:21 +0000
committerZachary Sloan2014-01-30 00:53:21 +0000
commita0f5aae35941d880f58f178c80bfe0b346d7e8af (patch)
treed2a578ddd53ae90d646d13d4d33476e81a92c9b5 /wqflask
parentd8dae52c0b534a37697962326f0fcbac781b0ce1 (diff)
downloadgenenetwork2-a0f5aae35941d880f58f178c80bfe0b346d7e8af.tar.gz
Most of the work is done towards running lmm.py from the command line
and storing the results in redis
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py2
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py130
-rwxr-xr-x[-rw-r--r--]wqflask/wqflask/my_pylmm/pyLMM/lmm.py79
3 files changed, 178 insertions, 33 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 9fbccf80..3deaa655 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -184,7 +184,7 @@ class HumanMarkers(Markers):
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
- print("splat:", splat)
+ #print("splat:", splat)
marker = {}
marker['chr'] = int(splat[0])
marker['name'] = splat[1]
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 5fa288df..1594d35d 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -7,16 +7,20 @@ from pprint import pformat as pf
import string
import sys
+import datetime
import os
import collections
+import uuid
import numpy as np
from scipy import linalg
-import simplejson as json
+import cPickle as pickle
-#from redis import Redis
+import simplejson as json
+from redis import Redis
+Redis = Redis()
from base.trait import GeneralTrait
from base import data_set
@@ -38,7 +42,7 @@ class MarkerRegression(object):
helper_functions.get_species_dataset_trait(self, start_vars)
- tempdata = temp_data.TempData(temp_uuid)
+ #tempdata = temp_data.TempData(temp_uuid)
self.samples = [] # Want only ones with values
self.vals = []
@@ -48,7 +52,8 @@ class MarkerRegression(object):
self.samples.append(str(sample))
self.vals.append(value)
- self.qtl_results = self.gen_data(tempdata)
+ #self.qtl_results = self.gen_data(tempdata)
+ self.qtl_results = self.gen_data(str(temp_uuid))
#Get chromosome lengths for drawing the manhattan plot
chromosome_mb_lengths = {}
@@ -61,15 +66,23 @@ class MarkerRegression(object):
)
- def gen_data(self, tempdata):
+ #def gen_data(self, tempdata):
+ def gen_data(self, temp_uuid):
"""Generates p-values for each marker"""
self.dataset.group.get_markers()
pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
+ #lmm_uuid = str(uuid.uuid4())
+
+ key = "pylmm:input:" + temp_uuid
+ print("key is:", pf(key))
+ #with Bench("Loading cache"):
+ # result = Redis.get(key)
+
if self.dataset.group.species == "human":
- p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
+ p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
else:
genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
@@ -77,25 +90,65 @@ class MarkerRegression(object):
trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
genotype_matrix = np.array(trimmed_genotype_data).T
-
+
#print("pheno_vector: ", pf(pheno_vector))
#print("genotype_matrix: ", pf(genotype_matrix))
#print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
+
+ #params = {"pheno_vector": pheno_vector,
+ # "genotype_matrix": genotype_matrix,
+ # "restricted_max_likelihood": True,
+ # "refit": False,
+ # "temp_data": tempdata}
+
+ params = dict(pheno_vector = pheno_vector.tolist(),
+ genotype_matrix = genotype_matrix.tolist(),
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = temp_uuid,
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat(),
+ )
+
+ json_params = json.dumps(params)
+ print("json_params:", json_params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
+ print("before printing command")
+
+ command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key,
+ "non-human")
+ print("command is:", command)
+ print("after printing command")
+
+ os.system(command)
+
+ #t_stats, p_values = lmm.run(key)
+ #lmm.run(key)
+
+ json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
+ results = json.load(json_results)
+ t_stats = results['t_stats']
+ p_values = results['p_values']
- t_stats, p_values = lmm.run(
- pheno_vector,
- genotype_matrix,
- restricted_max_likelihood=True,
- refit=False,
- temp_data=tempdata
- )
+ print("p_values:", p_values)
+
+ #t_stats, p_values = lmm.run(
+ # pheno_vector,
+ # genotype_matrix,
+ # restricted_max_likelihood=True,
+ # refit=False,
+ # temp_data=tempdata
+ #)
#print("p_values:", p_values)
-
+
self.dataset.group.markers.add_pvalues(p_values)
return self.dataset.group.markers.markers
- def gen_human_results(self, pheno_vector, tempdata):
+ #def gen_human_results(self, pheno_vector, tempdata):
+ def gen_human_results(self, pheno_vector, key, temp_uuid):
file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
plink_input = input.plink(file_base, type='b')
@@ -106,16 +159,45 @@ class MarkerRegression(object):
kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
- p_values, t_stats = lmm.run_human(
- pheno_vector,
- covariate_matrix,
- input_file_name,
- kinship_matrix,
- loading_progress=tempdata
- )
+ print("Before creating params")
+
+ params = dict(pheno_vector = pheno_vector.tolist(),
+ covariate_matrix = covariate_matrix.tolist(),
+ input_file_name = input_file_name,
+ kinship_matrix = kinship_matrix.tolist(),
+ refit = False,
+ temp_uuid = temp_uuid,
+
+ # meta data
+ timestamp = datetime.datetime.isoformat(),
+ )
+
+ print("After creating params")
+
+ json_params = json.dumps(params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
- return p_values, t_stats
+ print("Before creating the command")
+ command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key,
+ "non-human")
+
+ print("command is:", command)
+
+ os.system(command)
+
+ p_values, t_stats = lmm.run_human(key)
+
+ #p_values, t_stats = lmm.run_human(
+ # pheno_vector,
+ # covariate_matrix,
+ # input_file_name,
+ # kinship_matrix,
+ # loading_progress=tempdata
+ # )
+
+ return p_values, t_stats
def identify_empty_samples(self):
no_val_samples = []
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 6d12991f..2560aa9e 100644..100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import, print_function, division
import sys
import time
+import argparse
import uuid
import numpy as np
@@ -27,6 +28,8 @@ from scipy import optimize
from scipy import stats
import pdb
+import simplejson as json
+
import gzip
import zlib
import datetime
@@ -40,17 +43,24 @@ from utility import temp_data
from wqflask.my_pylmm.pyLMM import chunks
-import redis
-Redis = redis.Redis()
+from redis import Redis
+Redis = Redis()
#np.seterr('raise')
+#def run_human(pheno_vector,
+# covariate_matrix,
+# plink_input_file,
+# kinship_matrix,
+# refit=False,
+# loading_progress=None):
+
def run_human(pheno_vector,
covariate_matrix,
plink_input_file,
kinship_matrix,
refit=False,
- loading_progress=None):
+ tempdata=None):
v = np.isnan(pheno_vector)
keep = True - v
@@ -142,7 +152,7 @@ def run_human(pheno_vector,
percent_complete = (float(count) / total_snps) * 100
#print("percent_complete: ", percent_complete)
- loading_progress.store("percent_complete", percent_complete)
+ tempdata.store("percent_complete", percent_complete)
#with Bench("actual association"):
ps, ts = human_association(snp,
@@ -218,11 +228,17 @@ def human_association(snp,
return ps, ts
-def run(pheno_vector,
+#def run(pheno_vector,
+# genotype_matrix,
+# restricted_max_likelihood=True,
+# refit=False,
+# temp_data=None):
+
+def run_other(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
refit=False,
- temp_data=None):
+ tempdata=None):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
@@ -232,8 +248,10 @@ def run(pheno_vector,
"""
+
+ print("In run_other")
with Bench("Calculate Kinship"):
- kinship_matrix = calculate_kinship(genotype_matrix, temp_data)
+ kinship_matrix = calculate_kinship(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -252,7 +270,7 @@ def run(pheno_vector,
kinship_matrix,
restricted_max_likelihood=True,
refit=False,
- temp_data=temp_data)
+ temp_data=tempdata)
Bench().report()
return t_stats, p_values
@@ -677,3 +695,48 @@ class LMM:
pl.xlabel("Heritability")
pl.ylabel("Probability of data")
pl.title(title)
+
+ def main():
+ parser = argparse.ArgumentParser(description='Run pyLMM')
+ parser.add_argument('-k', '--key')
+
+ opts = parser.parse_args()
+
+ key = opts.key
+
+ json_params = Redis.get(key)
+
+ params = json.loads(json_params)
+ print("params:", params)
+
+ is_human = params['human']
+
+ tempdata = temp_data.TempData(params['temp_uuid'])
+ if is_human:
+ ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
+ covariate_matrix = np.array(params['covariate_matrix']),
+ plink_input_file = params['input_file_name'],
+ kinship_matrix = np.array(params['kinship_matrix']),
+ refit = params['refit'],
+ tempdata = tempdata)
+ else:
+ ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']),
+ genotype_matrix = np.array(params['genotype_matrix']),
+ restricted_max_likelihood = params['restricted_max_likelihood'],
+ refit = params['refit'],
+ tempdata = tempdata)
+
+ results_key = "pylmm:results:" + params['temp_uuid']
+
+ json_results = json.dumps(dict(p_values = ps,
+ t_stats = ts))
+
+ #Pushing json_results into a list where it is the only item because blpop needs a list
+ Redis.rpush(results_key, json_results)
+ Redis.expire(results_key, 60*60)
+
+if __name__ == '__main__':
+ main()
+
+
+