about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2015-04-02 13:40:42 +0200
committerPjotr Prins2015-04-02 13:40:42 +0200
commitb9c79ef58ff6ec4da3e65290ea802c783bb17742 (patch)
treebe174d92bf2c5b819da63ea4a845f26cb87bd2a7
parent5151bc389aa98415da9f4d49b3c279ed1380ea7d (diff)
downloadgenenetwork2-b9c79ef58ff6ec4da3e65290ea802c783bb17742.tar.gz
Passing in an iterator
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py33
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py6
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py26
3 files changed, 57 insertions, 8 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8be3fc6f..07b55726 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -875,6 +875,9 @@ def gn2_main():
     gn2_redis(key,species)
 
 def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+    """
+    This function emulates current GN2 behaviour by pre-loading Redis
+    """
     print("Loading Redis from parsed data")
     if kinship == None:
         k = None
@@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
     Redis.expire(key, 60*60)
 
     return gn2_redis(key,species,new_code)
-    
+
+def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
+    """
+    This function emulates GN2 behaviour by pre-loading Redis with
+    a SNP iterator
+    """
+    print("Loading Redis using a SNP iterator")
+    if kinship == None:
+        k = None
+    else:
+        k = kinship.tolist()
+    params = dict(pheno_vector = pheno.tolist(),
+                  genotype_matrix = geno_iterator.tolist(),
+                  kinship_matrix = k,
+                  restricted_max_likelihood = True,
+                  refit = False,
+                  temp_uuid = "testrun_temp_uuid",
+                        
+                  # meta data
+                  timestamp = datetime.datetime.now().isoformat(),
+    )
+            
+    json_params = json.dumps(params)
+    Redis.set(key, json_params)
+    Redis.expire(key, 60*60)
+
+    return gn2_redis(key,species,new_code)
+
+
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
     if has_gn2:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 036bf7d5..3b0672b4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
 import sys
 import tsvreader
 import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_new
+from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new
 from kinship import kinship, kinship_full
 import genotype
 import phenotype
@@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator':
     print g.shape
 
 if cmd == 'iterator':
-    def snp_iterator(func):
-        tsvreader.geno_iter(options.geno,func)
-        
     if options.remove_missing_phenotypes:
         raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+    snp_iterator =  tsvreader.geno_iter(options.geno)
     ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator)
     print np.array(ps)
     print len(ps),sum(ps)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index 7fe75e3f..27daf43f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -76,13 +76,12 @@ def geno(fn):
 
 def geno(fn):
     G1 = []
-    def append(id,values):
+    for id,values in geno_iter(fn):
         G1.append(values) # <--- slow
-    geno_iter(fn,append)
     G = np.array(G1)
     return G
 
-def geno_iter(fn,func):
+def geno_callback(fn,func):
     hab_mapper = {'A':0,'H':1,'B':2,'-':3}
     pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
 
@@ -99,3 +98,24 @@ def geno_iter(fn,func):
             gs = list(row[1])
             gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
             func(id,gs2) 
+
+def geno_iter(fn):
+    """
+    Yield a tuple of snpid and values
+    """
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+    print fn
+    with open(fn,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            id = row[0]
+            gs = list(row[1])
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            yield (id,gs2)