aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorPjotr Prins2015-04-02 12:04:14 +0200
committerPjotr Prins2015-04-02 12:04:14 +0200
commit43295e57621e9a08ca4cb90e95cc14a87e0d8b5e (patch)
tree0d37a9e547c4088e5ce355e61400424f3e90c5a1 /wqflask
parent0f132d0cc4a77e69ab593fd9c8a2d5218d083ed7 (diff)
downloadgenenetwork2-43295e57621e9a08ca4cb90e95cc14a87e0d8b5e.tar.gz
Create test geno iterator
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py9
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py25
2 files changed, 32 insertions, 2 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ef0bdd7e..5a4bd268 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -99,11 +99,16 @@ if options.pheno:
y = tsvreader.pheno(options.pheno)
print y.shape
-if options.geno:
+if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'redis_new':
+if cmd == 'iterator':
+ print "ITERATE over SNPs"
+ def pretty(snpid,values):
+ print snpid,values
+ print tsvreader.geno_iter(options.geno,pretty)
+elif cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
if options.remove_missing_phenotypes:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..7fe75e3f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -74,3 +74,28 @@ def geno(fn):
G = np.array(G1)
return G
+def geno(fn):
+ G1 = []
+ def append(id,values):
+ G1.append(values) # <--- slow
+ geno_iter(fn,append)
+ G = np.array(G1)
+ return G
+
+def geno_iter(fn,func):
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ func(id,gs2)