diff options
| author | Zachary Sloan | 2013-04-17 23:49:47 +0000 | 
|---|---|---|
| committer | Zachary Sloan | 2013-04-17 23:49:47 +0000 | 
| commit | ea53a2f20d13130f3555967d57282b3c9562da5a (patch) | |
| tree | 6bf929eb232b957867d62e31fee2dc7aff49f522 | |
| parent | 296d7dec13a57519e64e99ab7c3a4673447c026f (diff) | |
| download | genenetwork2-ea53a2f20d13130f3555967d57282b3c9562da5a.tar.gz | |
Created file with pickled SNPIterator (from input.py) data
for HLC datasets Still need to read in file
| -rwxr-xr-x | wqflask/base/webqtlConfig.py | 1 | ||||
| -rw-r--r-- | wqflask/utility/temp_data.py | 6 | ||||
| -rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 48 | ||||
| -rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 27 | 
4 files changed, 50 insertions, 32 deletions
| diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 1845c749..49afb631 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,6 +53,7 @@ SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'web/' PYLMM_PATH = HTMLPATH + 'plink/' +SNP_PATH = '/mnt/xvdf1/snps/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index 004d45c6..ddf2653c 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -5,14 +5,16 @@ import simplejson as json class TempData(object): - def __init__(self, temp_uuid): + def __init__(self, temp_uuid, part=None): self.temp_uuid = temp_uuid self.redis = Redis() self.key = "tempdata:{}".format(self.temp_uuid) + if part: + self.key += ":{}".format(part) def store(self, field, value): self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*15) # Expire in 15 minutes + self.redis.expire(self.key, 60*60) # Expire in 60 minutes def get_all(self): return self.redis.hgetall(self.key) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 86d9fe06..2ede5660 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -9,11 +9,12 @@ import string import sys import os import collections -import pdb import numpy as np from scipy import linalg +import simplejson as json + #from redis import Redis @@ -41,8 +42,6 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] - print("start_vars: ", pf(start_vars)) - self.suggestive = float(start_vars['suggestive']) for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] @@ -52,13 +51,12 @@ class MarkerRegression(object): self.gen_data(tempdata) #Get chromosome lengths for drawing the manhattan plot - chromosomes = {} + chromosome_mb_lengths = {} for key in self.species.chromosomes.chromosomes.keys(): - this_chr = self.species.chromosomes.chromosomes[key] - chromosomes[key] = [this_chr.name, this_chr.mb_length] + chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length self.js_data = dict( - chromosomes = chromosomes, + chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, ) @@ -74,10 +72,10 @@ class MarkerRegression(object): p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) else: genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - + no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_value_samples=[]) - pdb.set_trace() + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + genotype_matrix = np.array(trimmed_genotype_data).T print("pheno_vector is: ", pf(pheno_vector)) @@ -90,23 +88,16 @@ class MarkerRegression(object): refit=False, temp_data=tempdata ) - + self.dataset.group.markers.add_pvalues(p_values) - self.qtl_results = [] - for marker in self.dataset.group.markers.markers: - if marker['lod_score'] >= self.suggestive: - self.qtl_results.append(marker) - - #self.qtl_results = self.dataset.group.markers.markers + self.qtl_results = self.dataset.group.markers.markers def gen_human_results(self, pheno_vector, tempdata): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) - - tempdata.store("percent_complete", 0) + plink_input = input.plink(file_base, type='b') - tempdata.store("percent_complete", 0.1) pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -118,11 +109,11 @@ class MarkerRegression(object): covariate_matrix, plink_input, kinship_matrix, - temp_data=tempdata + loading_progress=tempdata ) return p_values, t_stats - + def identify_empty_samples(self): no_val_samples = [] @@ -147,4 +138,17 @@ class MarkerRegression(object): trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data +def create_snp_iterator_file(group): + plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group) + plink_input = input.plink(plink_file_base, type='b') + inputs = list(plink_input) + snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps") + + with open(snp_file_base, "w") as fh: + pickle.dump(inputs, fh) + + +if __name__ == '__main__': + import cPickle as pickle + create_snp_iterator_file("HLC") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 24565af8..918f8200 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,15 +19,21 @@ from __future__ import absolute_import, print_function, division import sys import time +import uuid + import numpy as np from scipy import linalg from scipy import optimize from scipy import stats import pdb +#import cPickle as pickle +import simplejson as json + from pprint import pformat as pf from utility.benchmark import Bench +from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks @@ -38,7 +44,7 @@ def run_human(pheno_vector, plink_input, kinship_matrix, refit=False, - temp_data=None): + loading_progress=None): v = np.isnan(pheno_vector) keep = True - v @@ -65,27 +71,32 @@ def run_human(pheno_vector, plink_input.getSNPIterator() total_snps = plink_input.numSNPs - number_chunks = 63 - with Bench("snp iterator loop"): count = 0 - - + with Bench("Create list of inputs"): inputs = list(plink_input) with Bench("Divide into chunks"): - results = chunks.divide_into_chunks(inputs, 63) + results = chunks.divide_into_chunks(inputs, 64) + result_store = [] + identifier = uuid.uuid4() + for part, result in enumerate(results): + data_store = temp_data.TempData(identifier, part) + + data_store.store(data=json.dumps(result.tolist())) + result_store.append(data_store) + for snp, this_id in plink_input: with Bench("part before association"): - if count > 500: + if count > 2000: break count += 1 percent_complete = (float(count) / total_snps) * 100 #print("percent_complete: ", percent_complete) - temp_data.store("percent_complete", percent_complete) + loading_progress.store("percent_complete", percent_complete) with Bench("actual association"): ps, ts = human_association(snp, | 
