aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Sloan2013-04-17 23:49:47 +0000
committerZachary Sloan2013-04-17 23:49:47 +0000
commitea53a2f20d13130f3555967d57282b3c9562da5a (patch)
tree6bf929eb232b957867d62e31fee2dc7aff49f522
parent296d7dec13a57519e64e99ab7c3a4673447c026f (diff)
downloadgenenetwork2-ea53a2f20d13130f3555967d57282b3c9562da5a.tar.gz
Created file with pickled SNPIterator (from input.py) data
for HLC datasets Still need to read in file
-rwxr-xr-xwqflask/base/webqtlConfig.py1
-rw-r--r--wqflask/utility/temp_data.py6
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py48
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py27
4 files changed, 50 insertions, 32 deletions
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 1845c749..49afb631 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -53,6 +53,7 @@ SECUREDIR = GNROOT + 'secure/'
COMMON_LIB = GNROOT + 'support/admin'
HTMLPATH = GNROOT + 'web/'
PYLMM_PATH = HTMLPATH + 'plink/'
+SNP_PATH = '/mnt/xvdf1/snps/'
IMGDIR = HTMLPATH +'image/'
IMAGESPATH = HTMLPATH + 'images/'
UPLOADPATH = IMAGESPATH + 'upload/'
diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py
index 004d45c6..ddf2653c 100644
--- a/wqflask/utility/temp_data.py
+++ b/wqflask/utility/temp_data.py
@@ -5,14 +5,16 @@ import simplejson as json
class TempData(object):
- def __init__(self, temp_uuid):
+ def __init__(self, temp_uuid, part=None):
self.temp_uuid = temp_uuid
self.redis = Redis()
self.key = "tempdata:{}".format(self.temp_uuid)
+ if part:
+ self.key += ":{}".format(part)
def store(self, field, value):
self.redis.hset(self.key, field, value)
- self.redis.expire(self.key, 60*15) # Expire in 15 minutes
+ self.redis.expire(self.key, 60*60) # Expire in 60 minutes
def get_all(self):
return self.redis.hgetall(self.key)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 86d9fe06..2ede5660 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -9,11 +9,12 @@ import string
import sys
import os
import collections
-import pdb
import numpy as np
from scipy import linalg
+import simplejson as json
+
#from redis import Redis
@@ -41,8 +42,6 @@ class MarkerRegression(object):
self.samples = [] # Want only ones with values
self.vals = []
- print("start_vars: ", pf(start_vars))
- self.suggestive = float(start_vars['suggestive'])
for sample in self.dataset.group.samplelist:
value = start_vars['value:' + sample]
@@ -52,13 +51,12 @@ class MarkerRegression(object):
self.gen_data(tempdata)
#Get chromosome lengths for drawing the manhattan plot
- chromosomes = {}
+ chromosome_mb_lengths = {}
for key in self.species.chromosomes.chromosomes.keys():
- this_chr = self.species.chromosomes.chromosomes[key]
- chromosomes[key] = [this_chr.name, this_chr.mb_length]
+ chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
self.js_data = dict(
- chromosomes = chromosomes,
+ chromosomes = chromosome_mb_lengths,
qtl_results = self.qtl_results,
)
@@ -74,10 +72,10 @@ class MarkerRegression(object):
p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
else:
genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
+
no_val_samples = self.identify_empty_samples()
- trimmed_genotype_data = self.trim_genotypes(genotype_data, no_value_samples=[])
- pdb.set_trace()
+ trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+
genotype_matrix = np.array(trimmed_genotype_data).T
print("pheno_vector is: ", pf(pheno_vector))
@@ -90,23 +88,16 @@ class MarkerRegression(object):
refit=False,
temp_data=tempdata
)
-
+
self.dataset.group.markers.add_pvalues(p_values)
- self.qtl_results = []
- for marker in self.dataset.group.markers.markers:
- if marker['lod_score'] >= self.suggestive:
- self.qtl_results.append(marker)
-
- #self.qtl_results = self.dataset.group.markers.markers
+ self.qtl_results = self.dataset.group.markers.markers
def gen_human_results(self, pheno_vector, tempdata):
file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
-
- tempdata.store("percent_complete", 0)
+
plink_input = input.plink(file_base, type='b')
- tempdata.store("percent_complete", 0.1)
pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
covariate_matrix = np.ones((pheno_vector.shape[0],1))
@@ -118,11 +109,11 @@ class MarkerRegression(object):
covariate_matrix,
plink_input,
kinship_matrix,
- temp_data=tempdata
+ loading_progress=tempdata
)
return p_values, t_stats
-
+
def identify_empty_samples(self):
no_val_samples = []
@@ -147,4 +138,17 @@ class MarkerRegression(object):
trimmed_genotype_data.append(new_genotypes)
return trimmed_genotype_data
+def create_snp_iterator_file(group):
+ plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
+ plink_input = input.plink(plink_file_base, type='b')
+ inputs = list(plink_input)
+ snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps")
+
+ with open(snp_file_base, "w") as fh:
+ pickle.dump(inputs, fh)
+
+
+if __name__ == '__main__':
+ import cPickle as pickle
+ create_snp_iterator_file("HLC")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 24565af8..918f8200 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,15 +19,21 @@ from __future__ import absolute_import, print_function, division
import sys
import time
+import uuid
+
import numpy as np
from scipy import linalg
from scipy import optimize
from scipy import stats
import pdb
+#import cPickle as pickle
+import simplejson as json
+
from pprint import pformat as pf
from utility.benchmark import Bench
+from utility import temp_data
from wqflask.my_pylmm.pyLMM import chunks
@@ -38,7 +44,7 @@ def run_human(pheno_vector,
plink_input,
kinship_matrix,
refit=False,
- temp_data=None):
+ loading_progress=None):
v = np.isnan(pheno_vector)
keep = True - v
@@ -65,27 +71,32 @@ def run_human(pheno_vector,
plink_input.getSNPIterator()
total_snps = plink_input.numSNPs
- number_chunks = 63
-
with Bench("snp iterator loop"):
count = 0
-
-
+
with Bench("Create list of inputs"):
inputs = list(plink_input)
with Bench("Divide into chunks"):
- results = chunks.divide_into_chunks(inputs, 63)
+ results = chunks.divide_into_chunks(inputs, 64)
+ result_store = []
+ identifier = uuid.uuid4()
+ for part, result in enumerate(results):
+ data_store = temp_data.TempData(identifier, part)
+
+ data_store.store(data=json.dumps(result.tolist()))
+ result_store.append(data_store)
+
for snp, this_id in plink_input:
with Bench("part before association"):
- if count > 500:
+ if count > 2000:
break
count += 1
percent_complete = (float(count) / total_snps) * 100
#print("percent_complete: ", percent_complete)
- temp_data.store("percent_complete", percent_complete)
+ loading_progress.store("percent_complete", percent_complete)
with Bench("actual association"):
ps, ts = human_association(snp,