From 2df34255de526e1e016100a8772d2c8e10eb970f Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 24 Jan 2014 20:52:46 +0000 Subject: Added natural sort to sort the index column for the sample data in the trait page Hid the Quick Search on the index page until it is working well. Started writing a file to run the pyLMM code in the background to avoid memory issues/timing out --- misc/notes.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'misc') diff --git a/misc/notes.txt b/misc/notes.txt index 5a43ab8a..fa2152d9 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -273,6 +273,10 @@ grep -ir (search string) (directory) =========================================== +Use argparse to deal with command line arguments (instead of argv) + +=========================================== + Change owner/group: chown zas1024 somefile (change owner of somefile to zas1024) -- cgit v1.2.3 From a0f5aae35941d880f58f178c80bfe0b346d7e8af Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 30 Jan 2014 00:53:21 +0000 Subject: Most of the work is done towards running lmm.py from the command line and storing the results in redis --- misc/notes.txt | 4 +- wqflask/base/data_set.py | 2 +- .../wqflask/marker_regression/marker_regression.py | 130 +++++++++++++++++---- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 79 +++++++++++-- 4 files changed, 181 insertions(+), 34 deletions(-) mode change 100644 => 100755 wqflask/wqflask/my_pylmm/pyLMM/lmm.py (limited to 'misc') diff --git a/misc/notes.txt b/misc/notes.txt index fa2152d9..f8ce2759 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -273,7 +273,9 @@ grep -ir (search string) (directory) =========================================== -Use argparse to deal with command line arguments (instead of argv) +Command line arguments: + +Use argparse to deal with command line arguments (instead of argv or optparse) =========================================== diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 9fbccf80..3deaa655 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -184,7 +184,7 @@ class HumanMarkers(Markers): self.markers = [] for line in marker_data_fh: splat = line.strip().split() - print("splat:", splat) + #print("splat:", splat) marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 5fa288df..1594d35d 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -7,16 +7,20 @@ from pprint import pformat as pf import string import sys +import datetime import os import collections +import uuid import numpy as np from scipy import linalg -import simplejson as json +import cPickle as pickle -#from redis import Redis +import simplejson as json +from redis import Redis +Redis = Redis() from base.trait import GeneralTrait from base import data_set @@ -38,7 +42,7 @@ class MarkerRegression(object): helper_functions.get_species_dataset_trait(self, start_vars) - tempdata = temp_data.TempData(temp_uuid) + #tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] @@ -48,7 +52,8 @@ class MarkerRegression(object): self.samples.append(str(sample)) self.vals.append(value) - self.qtl_results = self.gen_data(tempdata) + #self.qtl_results = self.gen_data(tempdata) + self.qtl_results = self.gen_data(str(temp_uuid)) #Get chromosome lengths for drawing the manhattan plot chromosome_mb_lengths = {} @@ -61,15 +66,23 @@ class MarkerRegression(object): ) - def gen_data(self, tempdata): + #def gen_data(self, tempdata): + def gen_data(self, temp_uuid): """Generates p-values for each marker""" self.dataset.group.get_markers() pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) + #lmm_uuid = str(uuid.uuid4()) + + key = "pylmm:input:" + temp_uuid + print("key is:", pf(key)) + #with Bench("Loading cache"): + # result = Redis.get(key) + if self.dataset.group.species == "human": - p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) + p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) else: genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] @@ -77,25 +90,65 @@ class MarkerRegression(object): trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) genotype_matrix = np.array(trimmed_genotype_data).T - + #print("pheno_vector: ", pf(pheno_vector)) #print("genotype_matrix: ", pf(genotype_matrix)) #print("genotype_matrix.shape: ", pf(genotype_matrix.shape)) + + #params = {"pheno_vector": pheno_vector, + # "genotype_matrix": genotype_matrix, + # "restricted_max_likelihood": True, + # "refit": False, + # "temp_data": tempdata} + + params = dict(pheno_vector = pheno_vector.tolist(), + genotype_matrix = genotype_matrix.tolist(), + restricted_max_likelihood = True, + refit = False, + temp_uuid = temp_uuid, + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + print("json_params:", json_params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + print("before printing command") + + command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key, + "non-human") + print("command is:", command) + print("after printing command") + + os.system(command) + + #t_stats, p_values = lmm.run(key) + #lmm.run(key) + + json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) + results = json.load(json_results) + t_stats = results['t_stats'] + p_values = results['p_values'] - t_stats, p_values = lmm.run( - pheno_vector, - genotype_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=tempdata - ) + print("p_values:", p_values) + + #t_stats, p_values = lmm.run( + # pheno_vector, + # genotype_matrix, + # restricted_max_likelihood=True, + # refit=False, + # temp_data=tempdata + #) #print("p_values:", p_values) - + self.dataset.group.markers.add_pvalues(p_values) return self.dataset.group.markers.markers - def gen_human_results(self, pheno_vector, tempdata): + #def gen_human_results(self, pheno_vector, tempdata): + def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') @@ -106,16 +159,45 @@ class MarkerRegression(object): kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) - p_values, t_stats = lmm.run_human( - pheno_vector, - covariate_matrix, - input_file_name, - kinship_matrix, - loading_progress=tempdata - ) + print("Before creating params") + + params = dict(pheno_vector = pheno_vector.tolist(), + covariate_matrix = covariate_matrix.tolist(), + input_file_name = input_file_name, + kinship_matrix = kinship_matrix.tolist(), + refit = False, + temp_uuid = temp_uuid, + + # meta data + timestamp = datetime.datetime.isoformat(), + ) + + print("After creating params") + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) - return p_values, t_stats + print("Before creating the command") + command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py {} {}'.format(key, + "non-human") + + print("command is:", command) + + os.system(command) + + p_values, t_stats = lmm.run_human(key) + + #p_values, t_stats = lmm.run_human( + # pheno_vector, + # covariate_matrix, + # input_file_name, + # kinship_matrix, + # loading_progress=tempdata + # ) + + return p_values, t_stats def identify_empty_samples(self): no_val_samples = [] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py old mode 100644 new mode 100755 index 6d12991f..2560aa9e --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,6 +19,7 @@ from __future__ import absolute_import, print_function, division import sys import time +import argparse import uuid import numpy as np @@ -27,6 +28,8 @@ from scipy import optimize from scipy import stats import pdb +import simplejson as json + import gzip import zlib import datetime @@ -40,17 +43,24 @@ from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks -import redis -Redis = redis.Redis() +from redis import Redis +Redis = Redis() #np.seterr('raise') +#def run_human(pheno_vector, +# covariate_matrix, +# plink_input_file, +# kinship_matrix, +# refit=False, +# loading_progress=None): + def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, refit=False, - loading_progress=None): + tempdata=None): v = np.isnan(pheno_vector) keep = True - v @@ -142,7 +152,7 @@ def run_human(pheno_vector, percent_complete = (float(count) / total_snps) * 100 #print("percent_complete: ", percent_complete) - loading_progress.store("percent_complete", percent_complete) + tempdata.store("percent_complete", percent_complete) #with Bench("actual association"): ps, ts = human_association(snp, @@ -218,11 +228,17 @@ def human_association(snp, return ps, ts -def run(pheno_vector, +#def run(pheno_vector, +# genotype_matrix, +# restricted_max_likelihood=True, +# refit=False, +# temp_data=None): + +def run_other(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False, - temp_data=None): + tempdata=None): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False @@ -232,8 +248,10 @@ def run(pheno_vector, """ + + print("In run_other") with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, temp_data) + kinship_matrix = calculate_kinship(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -252,7 +270,7 @@ def run(pheno_vector, kinship_matrix, restricted_max_likelihood=True, refit=False, - temp_data=temp_data) + temp_data=tempdata) Bench().report() return t_stats, p_values @@ -677,3 +695,48 @@ class LMM: pl.xlabel("Heritability") pl.ylabel("Probability of data") pl.title(title) + + def main(): + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + + opts = parser.parse_args() + + key = opts.key + + json_params = Redis.get(key) + + params = json.loads(json_params) + print("params:", params) + + is_human = params['human'] + + tempdata = temp_data.TempData(params['temp_uuid']) + if is_human: + ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), + covariate_matrix = np.array(params['covariate_matrix']), + plink_input_file = params['input_file_name'], + kinship_matrix = np.array(params['kinship_matrix']), + refit = params['refit'], + tempdata = tempdata) + else: + ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']), + genotype_matrix = np.array(params['genotype_matrix']), + restricted_max_likelihood = params['restricted_max_likelihood'], + refit = params['refit'], + tempdata = tempdata) + + results_key = "pylmm:results:" + params['temp_uuid'] + + json_results = json.dumps(dict(p_values = ps, + t_stats = ts)) + + #Pushing json_results into a list where it is the only item because blpop needs a list + Redis.rpush(results_key, json_results) + Redis.expire(results_key, 60*60) + +if __name__ == '__main__': + main() + + + -- cgit v1.2.3