diff options
author | Pjotr Prins | 2015-04-02 13:40:42 +0200 |
---|---|---|
committer | Pjotr Prins | 2015-04-02 13:40:42 +0200 |
commit | b9c79ef58ff6ec4da3e65290ea802c783bb17742 (patch) | |
tree | be174d92bf2c5b819da63ea4a845f26cb87bd2a7 | |
parent | 5151bc389aa98415da9f4d49b3c279ed1380ea7d (diff) | |
download | genenetwork2-b9c79ef58ff6ec4da3e65290ea802c783bb17742.tar.gz |
Passing in an iterator
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 33 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 6 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 26 |
3 files changed, 57 insertions, 8 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8be3fc6f..07b55726 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -875,6 +875,9 @@ def gn2_main(): gn2_redis(key,species) def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): + """ + This function emulates current GN2 behaviour by pre-loading Redis + """ print("Loading Redis from parsed data") if kinship == None: k = None @@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.expire(key, 60*60) return gn2_redis(key,species,new_code) - + +def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): + """ + This function emulates GN2 behaviour by pre-loading Redis with + a SNP iterator + """ + print("Loading Redis using a SNP iterator") + if kinship == None: + k = None + else: + k = kinship.tolist() + params = dict(pheno_vector = pheno.tolist(), + genotype_matrix = geno_iterator.tolist(), + kinship_matrix = k, + restricted_max_likelihood = True, + refit = False, + temp_uuid = "testrun_temp_uuid", + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + + return gn2_redis(key,species,new_code) + + if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") if has_gn2: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 036bf7d5..3b0672b4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_new +from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator': print g.shape if cmd == 'iterator': - def snp_iterator(func): - tsvreader.geno_iter(options.geno,func) - if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') + snp_iterator = tsvreader.geno_iter(options.geno) ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) print np.array(ps) print len(ps),sum(ps) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index 7fe75e3f..27daf43f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -76,13 +76,12 @@ def geno(fn): def geno(fn): G1 = [] - def append(id,values): + for id,values in geno_iter(fn): G1.append(values) # <--- slow - geno_iter(fn,append) G = np.array(G1) return G -def geno_iter(fn,func): +def geno_callback(fn,func): hab_mapper = {'A':0,'H':1,'B':2,'-':3} pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] @@ -99,3 +98,24 @@ def geno_iter(fn,func): gs = list(row[1]) gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] func(id,gs2) + +def geno_iter(fn): + """ + Yield a tuple of snpid and values + """ + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + yield (id,gs2) |