diff options
15 files changed, 564 insertions, 55 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index e69de29b..e69de29b 100755..100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py b/wqflask/wqflask/my_pylmm/pyLMM/chunks.py index abeffee7..abeffee7 100755..100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/chunks.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py new file mode 100644 index 00000000..3b6b5d70 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py @@ -0,0 +1,178 @@ +# This is a converter for common LMM formats, so as to keep complexity +# outside the main routines. + +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from __future__ import print_function +from optparse import OptionParser +import sys +import os +import numpy as np +# from lmm import LMM, run_other +# import input +import plink + +usage = """ +python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--pheno pname] [--geno gname] + + Convert files for runlmm.py processing. Writes to stdout by default. + + try --help for more information +""" + +# if len(args) == 0: +# print usage +# sys.exit(1) + +option_parser = OptionParser(usage=usage) +option_parser.add_option("--kinship", dest="kinship", + help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program") +option_parser.add_option("--pheno", dest="pheno", + help="Parse a phenotype file (use with --plink only)") +option_parser.add_option("--geno", dest="geno", + help="Parse a genotype file (use with --plink only)") +option_parser.add_option("--plink", dest="plink", action="store_true", default=False, + help="Parse PLINK style") +# option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True, +# help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.") +option_parser.add_option("--prefix", dest="prefix", + help="Output prefix for output file(s)") +option_parser.add_option("-q", "--quiet", + action="store_false", dest="verbose", default=True, + help="don't print status messages to stdout") +option_parser.add_option("-v", "--verbose", + action="store_true", dest="verbose", default=False, + help="Print extra info") + +(options, args) = option_parser.parse_args() + +writer = None +num_inds = None +snp_names = [] +ind_names = [] + +def msg(s): + sys.stderr.write("INFO: ") + sys.stderr.write(s) + sys.stderr.write("\n") + +def wr(s): + if writer: + writer.write(s) + else: + sys.stdout.write(s) + +def wrln(s): + wr(s) + wr("\n") + + +if options.pheno: + if not options.plink: + raise Exception("Use --plink switch") + # Because plink does not track size we need to read the whole thing first + msg("Converting pheno "+options.pheno) + phenos = [] + count = 0 + count_pheno = None + for line in open(options.pheno,'r'): + count += 1 + list = line.split() + pcount = len(list)-2 + assert(pcount > 0) + if count_pheno == None: + count_pheno = pcount + assert(count_pheno == pcount) + row = [list[0]]+list[2:] + phenos.append(row) + + writer = None + if options.prefix: + writer = open(options.prefix+".pheno","w") + wrln("# Phenotype format version 1.0") + wrln("# Individuals = "+str(count)) + wrln("# Phenotypes = "+str(count_pheno)) + for i in range(count_pheno): + wr("\t"+str(i+1)) + wr("\n") + for i in range(count): + wr("\t".join(phenos[i])) + wr("\n") + num_inds = count + msg(str(count)+" pheno lines written") + +if options.kinship: + is_header = True + count = 0 + msg("Converting kinship "+options.kinship) + writer = None + if options.prefix: + writer = open(options.prefix+".kin","w") + for line in open(options.kinship,'r'): + count += 1 + if is_header: + size = len(line.split()) + wrln("# Kinship format version 1.0") + wrln("# Size="+str(size)) + for i in range(size): + wr("\t"+str(i+1)) + wr("\n") + is_header = False + wr(str(count)) + wr("\t") + wr("\t".join(line.split())) + wr("\n") + num_inds = count + msg(str(count)+" kinship lines written") + +if options.geno: + msg("Converting geno "+options.geno+'.bed') + if not options.plink: + raise Exception("Use --plink switch") + if not num_inds: + raise Exception("Can not figure out the number of individuals, use --pheno or --kinship") + bim_snps = plink.readbim(options.geno+'.bim') + num_snps = len(bim_snps) + writer = None + if options.prefix: + writer = open(options.prefix+".geno","w") + wrln("# Genotype format version 1.0") + wrln("# Individuals = "+str(num_inds)) + wrln("# SNPs = "+str(num_snps)) + wrln("# Encoding = HAB") + for i in range(num_inds): + wr("\t"+str(i+1)) + wr("\n") + + m = [] + def out(i,x): + # wr(str(i)+"\t") + # wr("\t".join(x)) + # wr("\n") + m.append(x) + + snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','-'), out) + + msg("Write transposed genotype matrix") + for g in range(num_snps): + wr(bim_snps[g][1]+"\t") + for i in range(num_inds): + wr(m[g][i]) + wr("\n") + + msg(str(count)+" geno lines written (with "+str(snps)+" snps)") + +msg("Converting done") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py index dfe28a4e..f7b556a5 100755..100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py @@ -133,13 +133,15 @@ class plink: return G def normalizeGenotype(self,G): + # print "Before",G + # print G.shape x = True - np.isnan(G) m = G[x].mean() s = np.sqrt(G[x].var()) G[np.isnan(G)] = m if s == 0: G = G - m else: G = (G - m) / s - + # print "After",G return G def getPhenos(self,phenoFile=None): @@ -260,4 +262,4 @@ class plink: L.append(D[self.indivs[i]]) P = P[L,:] - return P
\ No newline at end of file + return P diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 369fcbcc..58d7593d 100755..100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -45,11 +45,17 @@ import sys sys.path.append("/home/zas1024/gene/wqflask/") print("sys.path2:", sys.path) +has_gn2=True + from utility.benchmark import Bench from utility import temp_data -from wqflask.my_pylmm.pyLMM import chunks - +try: + from wqflask.my_pylmm.pyLMM import chunks +except ImportError: + print("WARNING: Standalone version missing the Genenetwork2 environment\n") + has_gn2=False + pass #np.seterr('raise') @@ -248,20 +254,22 @@ def run_other(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False, - tempdata=None): + tempdata=None, # <---- can not be None + is_testing=False): + """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) + calculations ("calculate_kinship" and "GWAS" take the majority of time) """ - print("In run_other") + print("REML=",restricted_max_likelihood," REFIT=",refit, "TESTING=",is_testing) with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -273,6 +281,7 @@ def run_other(pheno_vector, lmm_ob.fit() print("genotype_matrix: ", genotype_matrix.shape) + print(genotype_matrix) with Bench("Doing GWAS"): t_stats, p_values = GWAS(pheno_vector, @@ -288,6 +297,7 @@ def run_other(pheno_vector, def matrixMult(A,B): # If there is no fblas then we will revert to np.dot() + try: linalg.fblas except AttributeError: @@ -315,7 +325,7 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) -def calculate_kinship(genotype_matrix, temp_data): +def calculate_kinship(genotype_matrix, temp_data, is_testing=False): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -325,10 +335,12 @@ def calculate_kinship(genotype_matrix, temp_data): """ n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("n is:", n) - print("m is:", m) + print("genotype 2D matrix n (inds) is:", n) + print("genotype 2D matrix m (snps) is:", m) keep = [] for counter in range(m): + if is_testing and counter>8: + break #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers not_number = np.isnan(genotype_matrix[:,counter]) @@ -472,7 +484,7 @@ class LMM: is not done consistently. """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True): + def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True,is_testing=False): """ The constructor takes a phenotype vector or array of size n. @@ -481,7 +493,8 @@ class LMM: X0 is an optional covariate matrix of size n x q, where there are q covariates. When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - + + self.is_testing = True if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose @@ -501,14 +514,14 @@ class LMM: self.nonmissing = x print("this K is:", pf(K)) - + if len(Kva) == 0 or len(Kve) == 0: if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() Kva,Kve = linalg.eigh(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - + self.K = K self.Kva = Kva self.Kve = Kve @@ -708,24 +721,20 @@ class LMM: pl.ylabel("Probability of data") pl.title(title) -def main(): - parser = argparse.ArgumentParser(description='Run pyLMM') - parser.add_argument('-k', '--key') - parser.add_argument('-s', '--species') - - opts = parser.parse_args() - - key = opts.key - species = opts.species - + +def gn2_redis(key,species,is_testing=False): json_params = Redis.get(key) params = json.loads(json_params) - #print("params:", params) - - #print("kinship_matrix:", params['kinship_matrix']) tempdata = temp_data.TempData(params['temp_uuid']) + + print('kinship', np.array(params['kinship_matrix'])) + print('pheno', np.array(params['pheno_vector'])) + geno = np.array(params['genotype_matrix']) + print('geno', geno.shape, geno) + # sys.exit(1) + if species == "human" : ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), covariate_matrix = np.array(params['covariate_matrix']), @@ -735,10 +744,11 @@ def main(): tempdata = tempdata) else: ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = np.array(params['genotype_matrix']), + genotype_matrix = geno, restricted_max_likelihood = params['restricted_max_likelihood'], refit = params['refit'], - tempdata = tempdata) + tempdata = tempdata, + is_testing=is_testing) results_key = "pylmm:results:" + params['temp_uuid'] @@ -748,9 +758,46 @@ def main(): #Pushing json_results into a list where it is the only item because blpop needs a list Redis.rpush(results_key, json_results) Redis.expire(results_key, 60*60) + return ps, ts + +# This is the main function used by Genenetwork2 (with environment) +def gn2_main(): + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + parser.add_argument('-s', '--species') + + opts = parser.parse_args() + + key = opts.key + species = opts.species + + gn2_redis(key,species) + +def gn2_load_redis(key,species,kinship,pheno,geno,is_testing): + print("Loading Redis from parsed data") + params = dict(pheno_vector = pheno.tolist(), + genotype_matrix = geno.tolist(), + kinship_matrix= kinship.tolist(), + restricted_max_likelihood = True, + refit = False, + temp_uuid = "testrun_temp_uuid", + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + return gn2_redis(key,species,is_testing) + if __name__ == '__main__': - main() + print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") + if has_gn2: + gn2_main() + else: + print("Run from runlmm.py instead") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py new file mode 100644 index 00000000..abb72081 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py @@ -0,0 +1,55 @@ +import sys +import time +import numpy as np +from numpy.distutils.system_info import get_info; +from scipy import linalg +from scipy import optimize +from scipy import stats + +useNumpy = None +hasBLAS = None + +def matrix_initialize(useBLAS): + global useNumpy # module based variable + if useBLAS and useNumpy == None: + print get_info('blas_opt') + try: + linalg.fblas + sys.stderr.write("INFO: using linalg.fblas\n") + useNumpy = False + hasBLAS = True + except AttributeError: + sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n") + useNumpy = True + else: + sys.stderr.write("INFO: using numpy.dot\n") + useNumpy=True + +def matrixMult(A,B): + global useNumpy # module based variable + + if useNumpy: + return np.dot(A,B) + + # If the matrices are in Fortran order then the computations will be faster + # when using dgemm. Otherwise, the function will copy the matrix and that takes time. + if not A.flags['F_CONTIGUOUS']: + AA = A.T + transA = True + else: + AA = A + transA = False + + if not B.flags['F_CONTIGUOUS']: + BB = B.T + transB = True + else: + BB = B + transB = False + + return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) + +def matrixMultT(M): + # res = np.dot(W,W.T) + # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False) + return matrixMult(M,M.T) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py new file mode 100644 index 00000000..7bd2df91 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py @@ -0,0 +1,107 @@ +# Plink module +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# Some of the BED file parsing came from pylmm: +# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# According to the PLINK information + +# Parse a textual BIM file and return the contents as a list of tuples +# +# Extended variant information file accompanying a .bed binary genotype table. +# +# A text file with no header line, and one line per variant with the following six fields: +# +# Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name +# Variant identifier +# Position in morgans or centimorgans (safe to use dummy value of '0') +# Base-pair coordinate (normally 1-based, but 0 ok; limited to 231-2) +# Allele 1 (corresponding to clear bits in .bed; usually minor) +# Allele 2 (corresponding to set bits in .bed; usually major) +# +# Allele codes can contain more than one character. Variants with negative bp coordinates are ignored by PLINK. Example +# +# 1 mm37-1-3125499 0 3125499 1 2 +# 1 mm37-1-3125701 0 3125701 1 2 +# 1 mm37-1-3187481 0 3187481 1 2 + +import struct +# import numpy as np + +def readbim(fn): + res = [] + for line in open(fn): + list = line.split() + if len([True for e in list if e == 'nan']) == 0: + res.append( (list[0],list[1],int(list[2]),int(list[3]),int(list[4]),int(list[5])) ) + else: + res.append( (list[0],list[1],list[2],float('nan'),float('nan'),float('nan')) ) + return res + +# .bed (PLINK binary biallelic genotype table) +# +# Primary representation of genotype calls at biallelic variants. Must +# be accompanied by .bim and .fam files. Basically contains num SNP +# blocks containing IND (compressed 4 IND into a byte) +# +# Since it is a biallelic format it supports for every individual +# whether the first allele is homozygous (b00), the second allele is +# homozygous (b11), it is heterozygous (b10) or that it is missing +# (b01). + +# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bed +# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam +# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim + +def readbed(fn,inds,encoding,func=None): + + # For every SNP block fetch the individual genotypes using values + # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles + def fetchGenotypes(X): + # D = { \ + # '00': 0.0, \ + # '10': 0.5, \ + # '11': 1.0, \ + # '01': float('nan') \ + # } + + Didx = { '00': 0, '10': 1, '11': 2, '01': 3 } + G = [] + for x in X: + if not len(x) == 10: + xx = x[2:] + x = '0b' + '0'*(8 - len(xx)) + xx + a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) + L = [encoding[Didx[y]] for y in [a,b,c,d]] + G += L + G = G[:inds] + # G = np.array(G) + return G + + bytes = inds / 4 + (inds % 4 and 1 or 0) + format = 'c'*bytes + count = 0 + with open(fn,'rb') as f: + magic = f.read(3) + assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01") + while True: + count += 1 + X = f.read(bytes) + if not X: + return(count-1) + XX = [bin(ord(x)) for x in struct.unpack(format,X)] + xs = fetchGenotypes(XX) + func(count,xs) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py index e47c18e1..e47c18e1 100755..100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ff53b4ea..738268be 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -19,32 +19,40 @@ from optparse import OptionParser import sys -import os +import tsvreader import numpy as np -# from lmm import LMM, run_other -import csv - +from lmm import gn2_load_redis usage = """ python runlmm.py [options] command - runlmm.py processing multiplexer reads standard input types and calls the routines + runlmm.py processing multiplexer reads standardised input formats + and calls the different routines Current commands are: parse : only parse input files + redis : use Redis to call into GN2 try --help for more information """ + parser = OptionParser(usage=usage) # parser.add_option("-f", "--file", dest="input file", # help="In", metavar="FILE") parser.add_option("--kinship",dest="kinship", - help="Kinship file format") + help="Kinship file format 1.0") +parser.add_option("--pheno",dest="pheno", + help="Phenotype file format 1.0") +parser.add_option("--geno",dest="geno", + help="Genotype file format 1.0") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") +parser.add_option("--test", + action="store_true", dest="testing", default=False, + help="Testing mode") (options, args) = parser.parse_args() @@ -55,22 +63,58 @@ if len(args) != 1: cmd = args[0] print "Command: ",cmd - if options.kinship: - K1 = [] - print options.kinship - with open(options.kinship,'r') as tsvin: - if tsvin.readline().strip() != "# Kinship format version 1.0": - raise Exception("Expecting Kinship format version 1.0") - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - ns = np.genfromtxt(row[1:]) - K1.append(ns) # <--- slow - K = np.array(K1) - -if cmd == 'parse': - pass -else: - raise Exception("Unknown command") + k = tsvreader.kinship(options.kinship) + print k.shape + +if options.pheno: + y = tsvreader.pheno(options.pheno) + print y.shape + +if options.geno: + g = tsvreader.geno(options.geno) + print g.shape + +def normalizeGenotype(G): + # Run for every SNP list (num individuals) + x = True - np.isnan(G) # Matrix of True/False + m = G[x].mean() # Global mean value + # print m + s = np.sqrt(G[x].var()) # Global stddev + # print s + G[np.isnan(G)] = m # Plug-in mean values for missing + if s == 0: + G = G - m # Subtract the mean + else: + G = (G - m) / s # Normalize the deviation + return G + +# g = g.reshape((1, -1))[0] +print("Before",g) +gn = [] +for snp in g: + gn.append( normalizeGenotype(snp) ) + +gn = g +gn = np.array(gn) +print("After1",gn) +gnT = gn.T +print("After",gnT) +# G = gnT +G = gnT +print "G shape",G.shape +# sys.exit(1) +# assert(G[0,0]==-2.25726341) + +# Remove individuals with missing phenotypes +v = np.isnan(y) +keep = True - v +if v.sum(): + if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum())) + y = y[keep] + G = G[keep,:] + k = k[keep,:][:,keep] + +if cmd == 'redis': + ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing) + print np.array(ps) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py new file mode 100644 index 00000000..b4027fa3 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -0,0 +1,76 @@ +# Standard file readers +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import sys +import os +import numpy as np +import csv + +def kinship(fn): + K1 = [] + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Kinship format version 1.0") + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + ns = np.genfromtxt(row[1:]) + K1.append(ns) # <--- slow + K = np.array(K1) + return K + +def pheno(fn): + Y1 = [] + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Phenotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + ns = np.genfromtxt(row[1:]) + Y1.append(ns) # <--- slow + Y = np.array(Y1) + return Y + +def geno(fn): + G1 = [] + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + # print(row) + id = row[0] + gs = list(row[1]) + # print id,gs + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + # print id,gs2 + # ns = np.genfromtxt(row[1:]) + G1.append(gs2) # <--- slow + G = np.array(G1) + return G + diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/scripts/example.py index 8b30debd..8b30debd 100755 --- a/wqflask/wqflask/my_pylmm/example.py +++ b/wqflask/wqflask/my_pylmm/scripts/example.py diff --git a/wqflask/wqflask/my_pylmm/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py index 74b58dd6..74b58dd6 100755 --- a/wqflask/wqflask/my_pylmm/pylmmGWAS.py +++ b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py diff --git a/wqflask/wqflask/my_pylmm/pylmmKinship.py b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py index c9a73ece..c9a73ece 100755 --- a/wqflask/wqflask/my_pylmm/pylmmKinship.py +++ b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py diff --git a/wqflask/wqflask/my_pylmm/run_pylmm.py b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py index 0c96d986..0c96d986 100755 --- a/wqflask/wqflask/my_pylmm/run_pylmm.py +++ b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py diff --git a/wqflask/wqflask/pylmm b/wqflask/wqflask/pylmm deleted file mode 160000 -Subproject cede848b7ce648366c1bdd7bc5df43c633eeb0d |