aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2015-04-02 13:40:42 +0200
committerPjotr Prins2015-04-02 13:40:42 +0200
commitb9c79ef58ff6ec4da3e65290ea802c783bb17742 (patch)
treebe174d92bf2c5b819da63ea4a845f26cb87bd2a7
parent5151bc389aa98415da9f4d49b3c279ed1380ea7d (diff)
downloadgenenetwork2-b9c79ef58ff6ec4da3e65290ea802c783bb17742.tar.gz
Passing in an iterator
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py33
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py6
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py26
3 files changed, 57 insertions, 8 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8be3fc6f..07b55726 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -875,6 +875,9 @@ def gn2_main():
gn2_redis(key,species)
def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+ """
+ This function emulates current GN2 behaviour by pre-loading Redis
+ """
print("Loading Redis from parsed data")
if kinship == None:
k = None
@@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
Redis.expire(key, 60*60)
return gn2_redis(key,species,new_code)
-
+
+def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
+ """
+ This function emulates GN2 behaviour by pre-loading Redis with
+ a SNP iterator
+ """
+ print("Loading Redis using a SNP iterator")
+ if kinship == None:
+ k = None
+ else:
+ k = kinship.tolist()
+ params = dict(pheno_vector = pheno.tolist(),
+ genotype_matrix = geno_iterator.tolist(),
+ kinship_matrix = k,
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = "testrun_temp_uuid",
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat(),
+ )
+
+ json_params = json.dumps(params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
+
+ return gn2_redis(key,species,new_code)
+
+
if __name__ == '__main__':
print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
if has_gn2:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 036bf7d5..3b0672b4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_new
+from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator':
print g.shape
if cmd == 'iterator':
- def snp_iterator(func):
- tsvreader.geno_iter(options.geno,func)
-
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ snp_iterator = tsvreader.geno_iter(options.geno)
ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator)
print np.array(ps)
print len(ps),sum(ps)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index 7fe75e3f..27daf43f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -76,13 +76,12 @@ def geno(fn):
def geno(fn):
G1 = []
- def append(id,values):
+ for id,values in geno_iter(fn):
G1.append(values) # <--- slow
- geno_iter(fn,append)
G = np.array(G1)
return G
-def geno_iter(fn,func):
+def geno_callback(fn,func):
hab_mapper = {'A':0,'H':1,'B':2,'-':3}
pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
@@ -99,3 +98,24 @@ def geno_iter(fn,func):
gs = list(row[1])
gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
func(id,gs2)
+
+def geno_iter(fn):
+ """
+ Yield a tuple of snpid and values
+ """
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ yield (id,gs2)