diff options
author | Pjotr Prins | 2015-04-02 12:04:14 +0200 |
---|---|---|
committer | Pjotr Prins | 2015-04-02 12:04:14 +0200 |
commit | 43295e57621e9a08ca4cb90e95cc14a87e0d8b5e (patch) | |
tree | 0d37a9e547c4088e5ce355e61400424f3e90c5a1 /wqflask | |
parent | 0f132d0cc4a77e69ab593fd9c8a2d5218d083ed7 (diff) | |
download | genenetwork2-43295e57621e9a08ca4cb90e95cc14a87e0d8b5e.tar.gz |
Create test geno iterator
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 9 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 25 |
2 files changed, 32 insertions, 2 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ef0bdd7e..5a4bd268 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -99,11 +99,16 @@ if options.pheno: y = tsvreader.pheno(options.pheno) print y.shape -if options.geno: +if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'redis_new': +if cmd == 'iterator': + print "ITERATE over SNPs" + def pretty(snpid,values): + print snpid,values + print tsvreader.geno_iter(options.geno,pretty) +elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first if options.remove_missing_phenotypes: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index b4027fa3..7fe75e3f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -74,3 +74,28 @@ def geno(fn): G = np.array(G1) return G +def geno(fn): + G1 = [] + def append(id,values): + G1.append(values) # <--- slow + geno_iter(fn,append) + G = np.array(G1) + return G + +def geno_iter(fn,func): + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + func(id,gs2) |