diff options
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 229 |
1 files changed, 0 insertions, 229 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py deleted file mode 100644 index 6b241cd6..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ /dev/null @@ -1,229 +0,0 @@ -# This is the LMM runner that calls the possible methods using command line -# switches. It acts as a multiplexer where all the invocation complexity -# is kept outside the main LMM routines. -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -from optparse import OptionParser -import sys -import tsvreader -import numpy as np - -# Add local dir to PYTHONPATH -import os -cwd = os.path.dirname(__file__) -if sys.path[0] != cwd: - sys.path.insert(1,cwd) - -# pylmm modules -from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas -from kinship import kinship, kinship_full -import genotype -import phenotype -from standalone import uses - -progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') - -usage = """ -python runlmm.py [options] command - - runlmm.py processing multiplexer reads standardised input formats - and calls the different routines (writes to stdout) - - Current commands are: - - parse : only parse input files - redis : use Redis to call into GN2 - kinship : calculate (new) kinship matrix - - try --help for more information -""" - - -parser = OptionParser(usage=usage) -# parser.add_option("-f", "--file", dest="input file", -# help="In", metavar="FILE") -parser.add_option("--kinship",dest="kinship", - help="Kinship file format 1.0") -parser.add_option("--pheno",dest="pheno", - help="Phenotype file format 1.0") -parser.add_option("--geno",dest="geno", - help="Genotype file format 1.0") -parser.add_option("--maf-normalization", - action="store_true", dest="maf_normalization", default=False, - help="Apply MAF genotype normalization") -parser.add_option("--genotype-normalization", - action="store_true", dest="genotype_normalization", default=False, - help="Force genotype normalization") -parser.add_option("--remove-missing-phenotypes", - action="store_true", dest="remove_missing_phenotypes", default=False, - help="Remove missing phenotypes") -parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", default=True, - help="don't print status messages to stdout") -parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication") -parser.add_option("-t", "--threads", - type="int", dest="numThreads", - help="Threads to use") -parser.add_option("--saveKvaKve", - action="store_true", dest="saveKvaKve", default=False, - help="Testing mode") -parser.add_option("--test", - action="store_true", dest="testing", default=False, - help="Testing mode") -parser.add_option("--test-kinship", - action="store_true", dest="test_kinship", default=False, - help="Testing mode for Kinship calculation") - -(options, args) = parser.parse_args() - -if len(args) != 1: - print usage - sys.exit(1) - -cmd = args[0] -print "Command: ",cmd - -k = None -y = None -g = None - -if options.kinship: - k = tsvreader.kinship(options.kinship) - print k.shape - -if options.pheno: - y = tsvreader.pheno(options.pheno) - print y.shape - -if options.geno and cmd != 'iterator': - g = tsvreader.geno(options.geno) - print g.shape - -def check_results(ps,ts): - print np.array(ps) - print len(ps),sum(ps) - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - if options.geno == 'data/small.geno': - info("Validating results for "+options.geno) - assert p1==0.7387, "p1=%f" % p1 - assert p2==0.7387, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - info("Validating results for "+options.geno) - assert p1==0.062, "p1=%f" % p1 - assert p2==0.062, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - info("Validating results for "+options.geno) - assert round(sum(ps)) == 4070 - assert len(ps) == 8000 - info("Run completed") - -if y is not None: - n = y.shape[0] - -if cmd == 'run': - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - n = len(y) - m = g.shape[1] - ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP - check_results(ps,ts) -elif cmd == 'iterator': - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - geno_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator) - check_results(ps,ts) -elif cmd == 'redis_new': - # The main difference between redis_new and redis is that missing - # phenotypes are handled by the first - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - Y = y - G = g - print "Original G",G.shape, "\n", G - # gt = G.T - # G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True) - check_results(ps,ts) -elif cmd == 'redis': - # Emulating the redis setup of GN2 - G = g - print "Original G",G.shape, "\n", G - if y is not None and options.remove_missing_phenotypes: - gnt = np.array(g).T - n,Y,g,keep = phenotype.remove_missing(n,y,gnt) - G = g.T - print "Removed missing phenotypes",G.shape, "\n", G - else: - Y = y - if options.maf_normalization: - G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - print "MAF replacements: \n",G - if options.genotype_normalization: - G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - g = None - gnt = None - - # gt = G.T - # G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) - check_results(ps,ts) -elif cmd == 'kinship': - G = g - print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: - gnt = np.array(g).T - n,Y,g,keep = phenotype.remove_missing(n,y,g.T) - G = g.T - print "Removed missing phenotypes",G.shape, "\n", G - if options.maf_normalization: - G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - print "MAF replacements: \n",G - if options.genotype_normalization: - G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - g = None - gnt = None - - if options.test_kinship: - K = kinship_full(np.copy(G)) - print "Genotype",G.shape, "\n", G - print "first Kinship method",K.shape,"\n",K - k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G)) - print "Genotype",G.shape, "\n", G - print "GN2 Kinship method",K2.shape,"\n",K2 - k2 = round(K2[0][0],4) - - print "Genotype",G.shape, "\n", G - K3 = kinship(G) - print "third Kinship method",K3.shape,"\n",K3 - sys.stderr.write(options.geno+"\n") - k3 = round(K3[0][0],4) - if options.geno == 'data/small.geno': - assert k1==0.8333, "k1=%f" % k1 - assert k2==0.9375, "k2=%f" % k2 - assert k3==0.9375, "k3=%f" % k3 - if options.geno == 'data/small_na.geno': - assert k1==0.8333, "k1=%f" % k1 - assert k2==0.7172, "k2=%f" % k2 - assert k3==0.7172, "k3=%f" % k3 - if options.geno == 'data/test8000.geno': - assert k3==1.4352, "k3=%f" % k3 - -else: - fatal("Doing nothing") |