Created file with pickled SNPIterator (from input.py) data

for HLC datasets Still need to read in file
author: Zachary Sloan 2013-04-17 23:49:47 +0000
committer: Zachary Sloan 2013-04-17 23:49:47 +0000
commit: ea53a2f20d13130f3555967d57282b3c9562da5a (patch)
tree: 6bf929eb232b957867d62e31fee2dc7aff49f522 /wqflask
parent: 296d7dec13a57519e64e99ab7c3a4673447c026f (diff)
download: genenetwork2-ea53a2f20d13130f3555967d57282b3c9562da5a.tar.gz
4 files changed, 50 insertions, 32 deletions
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 1845c749..49afb631 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -53,6 +53,7 @@ SECUREDIR = GNROOT + 'secure/'
 COMMON_LIB = GNROOT + 'support/admin'
 HTMLPATH = GNROOT + 'web/'
 PYLMM_PATH = HTMLPATH + 'plink/'
+SNP_PATH = '/mnt/xvdf1/snps/' 
 IMGDIR = HTMLPATH +'image/'
 IMAGESPATH = HTMLPATH + 'images/'
 UPLOADPATH = IMAGESPATH + 'upload/'
diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py
index 004d45c6..ddf2653c 100644
--- a/wqflask/utility/temp_data.py
+++ b/wqflask/utility/temp_data.py
@@ -5,14 +5,16 @@ import simplejson as json
 
 class TempData(object):
     
-    def __init__(self, temp_uuid):
+    def __init__(self, temp_uuid, part=None):
         self.temp_uuid = temp_uuid
         self.redis = Redis()
         self.key = "tempdata:{}".format(self.temp_uuid)
+        if part:
+            self.key += ":{}".format(part)
         
     def store(self, field, value):
         self.redis.hset(self.key, field, value)
-        self.redis.expire(self.key, 60*15)  # Expire in 15 minutes
+        self.redis.expire(self.key, 60*60)  # Expire in 60 minutes
         
     def get_all(self):
         return self.redis.hgetall(self.key)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 86d9fe06..2ede5660 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -9,11 +9,12 @@ import string
 import sys
 import os
 import collections
-import pdb
 
 import numpy as np
 from scipy import linalg
 
+import simplejson as json
+
 #from redis import Redis
 
 
@@ -41,8 +42,6 @@ class MarkerRegression(object):
         
         self.samples = [] # Want only ones with values
         self.vals = []
-        print("start_vars: ", pf(start_vars))
-        self.suggestive = float(start_vars['suggestive'])
 
         for sample in self.dataset.group.samplelist:
             value = start_vars['value:' + sample]
@@ -52,13 +51,12 @@ class MarkerRegression(object):
         self.gen_data(tempdata)
 
         #Get chromosome lengths for drawing the manhattan plot
-        chromosomes = {}
+        chromosome_mb_lengths = {}
         for key in self.species.chromosomes.chromosomes.keys():
-            this_chr = self.species.chromosomes.chromosomes[key]
-            chromosomes[key] = [this_chr.name, this_chr.mb_length]
+            chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
         
         self.js_data = dict(
-            chromosomes = chromosomes,
+            chromosomes = chromosome_mb_lengths,
             qtl_results = self.qtl_results,
         )
 
@@ -74,10 +72,10 @@ class MarkerRegression(object):
             p_values, t_stats = self.gen_human_results(pheno_vector, tempdata)
         else:
             genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
+            
             no_val_samples = self.identify_empty_samples()
-            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_value_samples=[])
-            pdb.set_trace()
+            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+            
             genotype_matrix = np.array(trimmed_genotype_data).T
             
             print("pheno_vector is: ", pf(pheno_vector))
@@ -90,23 +88,16 @@ class MarkerRegression(object):
                 refit=False,
                 temp_data=tempdata
             )
-
+        
         self.dataset.group.markers.add_pvalues(p_values)
 
-        self.qtl_results = []
-        for marker in self.dataset.group.markers.markers:
-            if marker['lod_score'] >= self.suggestive:
-                self.qtl_results.append(marker)
-        
-        #self.qtl_results = self.dataset.group.markers.markers
+        self.qtl_results = self.dataset.group.markers.markers
 
 
     def gen_human_results(self, pheno_vector, tempdata):
         file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
-        
-        tempdata.store("percent_complete", 0)
+
         plink_input = input.plink(file_base, type='b')
-        tempdata.store("percent_complete", 0.1)
 
         pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
         covariate_matrix = np.ones((pheno_vector.shape[0],1))
@@ -118,11 +109,11 @@ class MarkerRegression(object):
                 covariate_matrix,
                 plink_input,
                 kinship_matrix,
-                temp_data=tempdata
+                loading_progress=tempdata
             )
 
         return p_values, t_stats
-    
+
 
     def identify_empty_samples(self):
         no_val_samples = []
@@ -147,4 +138,17 @@ class MarkerRegression(object):
             trimmed_genotype_data.append(new_genotypes)
         return trimmed_genotype_data
     
+def create_snp_iterator_file(group):
+    plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
+    plink_input = input.plink(plink_file_base, type='b')
+    inputs = list(plink_input)
     
+    snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps")
+    
+    with open(snp_file_base, "w") as fh:
+        pickle.dump(inputs, fh)
+    
+
+if __name__ == '__main__':
+    import cPickle as pickle
+    create_snp_iterator_file("HLC")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 24565af8..918f8200 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,15 +19,21 @@ from __future__ import absolute_import, print_function, division
 
 import sys
 import time
+import uuid
+
 import numpy as np
 from scipy import linalg
 from scipy import optimize
 from scipy import stats
 import pdb
 
+#import cPickle as pickle
+import simplejson as json
+
 from pprint import pformat as pf
 
 from utility.benchmark import Bench
+from utility import temp_data
 
 from wqflask.my_pylmm.pyLMM import chunks
 
@@ -38,7 +44,7 @@ def run_human(pheno_vector,
             plink_input,
             kinship_matrix,
             refit=False,
-            temp_data=None):
+            loading_progress=None):
 
     v = np.isnan(pheno_vector)
     keep = True - v
@@ -65,27 +71,32 @@ def run_human(pheno_vector,
     plink_input.getSNPIterator()
     total_snps = plink_input.numSNPs
 
-    number_chunks = 63
-
     with Bench("snp iterator loop"):
         count = 0
-        
-            
+
         with Bench("Create list of inputs"):
             inputs = list(plink_input)
             
         with Bench("Divide into chunks"):
-            results = chunks.divide_into_chunks(inputs, 63)
+            results = chunks.divide_into_chunks(inputs, 64)
             
+        result_store = []
+        identifier = uuid.uuid4()
+        for part, result in enumerate(results):
+            data_store = temp_data.TempData(identifier, part)
+            
+            data_store.store(data=json.dumps(result.tolist()))
+            result_store.append(data_store)
+
         for snp, this_id in plink_input:
             with Bench("part before association"):
-                if count > 500:
+                if count > 2000:
                     break
                 count += 1
                 
                 percent_complete = (float(count) / total_snps) * 100
                 #print("percent_complete: ", percent_complete)
-                temp_data.store("percent_complete", percent_complete)
+                loading_progress.store("percent_complete", percent_complete)
         
             with Bench("actual association"):
                 ps, ts = human_association(snp,
author	Zachary Sloan	2013-04-17 23:49:47 +0000
committer	Zachary Sloan	2013-04-17 23:49:47 +0000
commit	ea53a2f20d13130f3555967d57282b3c9562da5a (patch)
tree	6bf929eb232b957867d62e31fee2dc7aff49f522 /wqflask
parent	296d7dec13a57519e64e99ab7c3a4673447c026f (diff)
download	genenetwork2-ea53a2f20d13130f3555967d57282b3c9562da5a.tar.gz