input file is now loaded by pickle

author: Zachary Sloan 2013-04-18 21:04:09 +0000
committer: Zachary Sloan 2013-04-18 21:04:09 +0000
commit: a1c44dd7c11013da06dbd782dd0a0ebbde5cc995 (patch)
tree: 58c83ba12167c2a6f02751b3c87f054ff20e2421
parent: ea53a2f20d13130f3555967d57282b3c9562da5a (diff)
download: genenetwork2-a1c44dd7c11013da06dbd782dd0a0ebbde5cc995.tar.gz
3 files changed, 31 insertions, 19 deletions
diff --git a/misc/notes.txt b/misc/notes.txt
index e40dad8f..10a5729a 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -154,6 +154,13 @@ ll h*: Finds items in the directory starting with h
 
 ===========================================
 
+du -hms * | sort -n : Gives size used by different directories
+-h: human readable
+-m: in megabytes (default)
+-s: summarize
+
+===========================================
+
 cp -a (archive; copies recursively and doesn't follow symbol links)
    -i (interactive, prompts before overwrite)
    -v (verbose)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 2ede5660..c3e9a934 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -98,6 +98,7 @@ class MarkerRegression(object):
         file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
 
         plink_input = input.plink(file_base, type='b')
+        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps")
 
         pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
         covariate_matrix = np.ones((pheno_vector.shape[0],1))
@@ -107,7 +108,7 @@ class MarkerRegression(object):
         p_values, t_stats = lmm.run_human(
                 pheno_vector,
                 covariate_matrix,
-                plink_input,
+                input_file_name,
                 kinship_matrix,
                 loading_progress=tempdata
             )
@@ -145,9 +146,8 @@ def create_snp_iterator_file(group):
     
     snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps")
     
-    with open(snp_file_base, "w") as fh:
+    with open(snp_file_base, "wb") as fh:
         pickle.dump(inputs, fh)
-    
 
 if __name__ == '__main__':
     import cPickle as pickle
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 918f8200..ab87e4f0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -27,7 +27,7 @@ from scipy import optimize
 from scipy import stats
 import pdb
 
-#import cPickle as pickle
+import cPickle as pickle
 import simplejson as json
 
 from pprint import pformat as pf
@@ -41,7 +41,7 @@ from wqflask.my_pylmm.pyLMM import chunks
 
 def run_human(pheno_vector,
             covariate_matrix,
-            plink_input,
+            plink_input_file,
             kinship_matrix,
             refit=False,
             loading_progress=None):
@@ -68,25 +68,30 @@ def run_human(pheno_vector,
     p_values = []
     t_stats = []
 
-    plink_input.getSNPIterator()
-    total_snps = plink_input.numSNPs
+    print("input_file: ", plink_input_file)
+
+    with open(plink_input_file, "rb") as input_file:
+        plink_input = pickle.load(input_file)
+
+    #plink_input.getSNPIterator()
+    #total_snps = plink_input.numSNPs
 
     with Bench("snp iterator loop"):
         count = 0
 
-        with Bench("Create list of inputs"):
-            inputs = list(plink_input)
-            
-        with Bench("Divide into chunks"):
-            results = chunks.divide_into_chunks(inputs, 64)
-            
-        result_store = []
-        identifier = uuid.uuid4()
-        for part, result in enumerate(results):
-            data_store = temp_data.TempData(identifier, part)
+        #with Bench("Create list of inputs"):
+        #    inputs = list(plink_input)
             
-            data_store.store(data=json.dumps(result.tolist()))
-            result_store.append(data_store)
+        #with Bench("Divide into chunks"):
+        #    results = chunks.divide_into_chunks(inputs, 64)
+        #    
+        #result_store = []
+        #identifier = uuid.uuid4()
+        #for part, result in enumerate(results):
+        #    data_store = temp_data.TempData(identifier, part)
+        #    
+        #    data_store.store(data=pickle.dumps(result))
+        #    result_store.append(data_store)
 
         for snp, this_id in plink_input:
             with Bench("part before association"):
author	Zachary Sloan	2013-04-18 21:04:09 +0000
committer	Zachary Sloan	2013-04-18 21:04:09 +0000
commit	a1c44dd7c11013da06dbd782dd0a0ebbde5cc995 (patch)
tree	58c83ba12167c2a6f02751b3c87f054ff20e2421
parent	ea53a2f20d13130f3555967d57282b3c9562da5a (diff)
download	genenetwork2-a1c44dd7c11013da06dbd782dd0a0ebbde5cc995.tar.gz