From d85ca24cfa021785504ad1d84b712880c8dfbcd1 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Feb 2015 14:25:15 +0300
Subject: Split out behaviour for calling lmm from GN2 or (testing) CLI

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 369fcbcc..8de3b3a7 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -45,11 +45,16 @@ import sys
 sys.path.append("/home/zas1024/gene/wqflask/")
 print("sys.path2:", sys.path)
 
-from utility.benchmark import Bench
-from utility import temp_data
-
-from wqflask.my_pylmm.pyLMM import chunks
+has_gn2=True
 
+try:
+    from utility.benchmark import Bench
+    from utility import temp_data
+    from wqflask.my_pylmm.pyLMM import chunks
+except ImportError:
+    print("WARNING: Standalone version\n")
+    has_gn2=False
+    pass
 
 #np.seterr('raise')
 
@@ -288,6 +293,7 @@ def run_other(pheno_vector,
 def matrixMult(A,B):
 
     # If there is no fblas then we will revert to np.dot()
+
     try:
         linalg.fblas
     except AttributeError:
@@ -708,7 +714,7 @@ class LMM:
        pl.ylabel("Probability of data")
        pl.title(title)
 
-def main():
+def gn2_main():
     parser = argparse.ArgumentParser(description='Run pyLMM')
     parser.add_argument('-k', '--key')
     parser.add_argument('-s', '--species')
@@ -750,7 +756,10 @@ def main():
     Redis.expire(results_key, 60*60)
 
 if __name__ == '__main__':
-    main()
+    if has_gn2:
+        gn2_main()
+    else:
+        cli_main()
 
 
-- 
cgit v1.2.3


From 2a2021e491b7c4aceebec95e4d184c98187d7201 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Feb 2015 14:27:55 +0300
Subject: Remove empty dir

---
 wqflask/wqflask/pylmm | 1 -
 1 file changed, 1 deletion(-)
 delete mode 160000 wqflask/wqflask/pylmm

(limited to 'wqflask')

diff --git a/wqflask/wqflask/pylmm b/wqflask/wqflask/pylmm
deleted file mode 160000
index cede848b..00000000
--- a/wqflask/wqflask/pylmm
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit cede848b7ce648366c1bdd7bc5df43c633eeb0d7
-- 
cgit v1.2.3


From 61b8ae229687612b4c9d4a0b089a9ef0ef47ff2c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Feb 2015 14:40:55 +0300
Subject: Moved pylmm scripts that are not used by GN2

---
 wqflask/wqflask/my_pylmm/example.py              |  58 ----
 wqflask/wqflask/my_pylmm/pylmmGWAS.py            | 346 -----------------------
 wqflask/wqflask/my_pylmm/pylmmKinship.py         | 186 ------------
 wqflask/wqflask/my_pylmm/run_pylmm.py            |  77 -----
 wqflask/wqflask/my_pylmm/scripts/example.py      |  58 ++++
 wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py    | 346 +++++++++++++++++++++++
 wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py | 186 ++++++++++++
 wqflask/wqflask/my_pylmm/scripts/run_pylmm.py    |  77 +++++
 8 files changed, 667 insertions(+), 667 deletions(-)
 delete mode 100755 wqflask/wqflask/my_pylmm/example.py
 delete mode 100755 wqflask/wqflask/my_pylmm/pylmmGWAS.py
 delete mode 100755 wqflask/wqflask/my_pylmm/pylmmKinship.py
 delete mode 100755 wqflask/wqflask/my_pylmm/run_pylmm.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/example.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/run_pylmm.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py
deleted file mode 100755
index 8b30debd..00000000
--- a/wqflask/wqflask/my_pylmm/example.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python
-
-from __future__ import absolute_import, print_function, division
-
-import sys
-import time
-
-import numpy as np
-from pyLMM import lmm
-
-from pprint import pformat as pf
-
-
-Y = np.genfromtxt('data/mdp.exprs.1.new')
-print("exprs is:", pf(Y.shape))
-
-# Loading npdump and first 1000 snps for speed
-#K = np.load('data/hmdp.liver.K.npdump')
-#snps = np.load('data/hmdp.liver.snps.1000.npdump').T
-
-# These three lines will load all SNPs (from npdump or from txt) and 
-# calculate the kinship
-snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
-print("snps is:", pf(snps.shape))
-#snps = snps[~np.isnan(snps).all(axis=1)]
-#print ("snps is now:", pf(snps))
-np.savetxt("/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.trimmed", snps, fmt='%s', delimiter=' ')
-#snps = np.load('data/hmdp.liver.snps.npdump').T
-K = lmm.calculateKinship(snps)
-#print("K is:", pf(K))
-#print("Y is:", pf(Y.shape))
-
-# Instantiate a LMM object for the phentoype Y and fit the null model
-L = lmm.LMM(Y,K)
-L.fit()
-
-# Manually calculate the association at one SNP
-X = snps[:,0]
-X[np.isnan(X)] = X[True - np.isnan(X)].mean() # Fill missing with MAF
-X = X.reshape(len(X),1)
-if X.var() == 0: ts,ps = (np.nan,np.nan)
-else: ts,ps = L.association(X)
-
-# If I want to refit the variance component
-L.fit(X=X)
-ts,ps = L.association(X)
-
-# If I want to do a genome-wide scan over the 1000 SNPs.
-# This call will use REML (REML = False means use ML).
-# It will also refit the variance components for each SNP.
-# Setting refit = False will cause the program to fit the model once
-# and hold those variance component estimates for each SNP.
-begin = time.time()
-TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=False)
-print("TS is:", pf(TS))
-print("PS is:", pf(PS))
-end = time.time()
-sys.stderr.write("Total time for 1000 SNPs: %0.3f\n" % (end- begin))
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/pylmmGWAS.py
deleted file mode 100755
index 74b58dd6..00000000
--- a/wqflask/wqflask/my_pylmm/pylmmGWAS.py
+++ /dev/null
@@ -1,346 +0,0 @@
-#!/usr/bin/python
-
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-#The program is free for academic use. Please contact Nick Furlotte
-#<nick.furlotte@gmail.com> if you are interested in using the software for
-#commercial purposes.
-
-#The software must not be modified and distributed without prior
-#permission of the author.
-
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pdb
-import time
-import sys
-
-def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
-def formatResult(id,beta,betaSD,ts,ps):
-   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
-   
-def outputResult(id,beta,betaSD,ts,ps):
-   out.write(formatResult(id,beta,betaSD,ts,ps))
-
-from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --kfile kinshipFile --[tfile | bfile] plinkFileBase outfileBase
-
-This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.  
-The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.  
-
-Basic usage:
-
-      python pylmmGWAS.py -v --bfile plinkFile --kfile preComputedKinship.kin --phenofile plinkFormattedPhenotypeFile resultFile
-
-	    """
-parser = OptionParser(usage=usage)
-
-basicGroup = OptionGroup(parser, "Basic Options")
-advancedGroup = OptionGroup(parser, "Advanced Options")
-experimentalGroup = OptionGroup(parser, "Experimental Options")
-
-#basicGroup.add_option("--pfile", dest="pfile",
-#                  help="The base for a PLINK ped file")
-basicGroup.add_option("--tfile", dest="tfile",
-                  help="The base for a PLINK tped file")
-basicGroup.add_option("--bfile", dest="bfile",
-                  help="The base for a PLINK binary bed file")
-basicGroup.add_option("--phenofile", dest="phenoFile", default=None,
-                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  This file should be in plink format. ")
-
-# EMMA Options
-basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the columns and snps on the rows.")
-basicGroup.add_option("--emmaPHENO", dest="emmaPheno", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each phenotype as one row.")
-basicGroup.add_option("--emmaCOV", dest="emmaCov", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each covariate as one row.")
-
-basicGroup.add_option("--kfile", dest="kfile",
-                  help="The location of a kinship file.  This is an nxn plain text file and can be computed with the pylmmKinship program.")
-basicGroup.add_option("--covfile", dest="covfile",
-                  help="The location of a covariate file file.  This is a plink formatted covariate file.")
-basicGroup.add_option("-p", type="int", dest="pheno", help="The phenotype index to be used in association.", default=0)
-
-
-
-advancedGroup.add_option("--removeMissingGenotypes",
-                  action="store_false", dest="normalizeGenotype", default=True,
-                  help="By default the program replaces missing genotypes with the minor allele frequency.  This option overrides that behavior making the program remove missing individuals.  NOTE: This can increase running time due to the need to recompute the eigendecomposition for each SNP with missing values.")
-advancedGroup.add_option("--refit",
-                  action="store_true", dest="refit", default=False,
-                  help="Refit the variance components at each SNP (default is to lock in the variance components under the null).")
-
-advancedGroup.add_option("--REML",
-                  action="store_true", dest="REML", default=False,
-                  help="Use restricted maximum-likelihood (REML) (default is maximum-likelihood).")
-#advancedGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
-advancedGroup.add_option("--eigen", dest="eigenfile",
-                  help="The location of the precomputed eigendecomposition for the kinship file.  These can be computed with pylmmKinship.py.")
-advancedGroup.add_option("--noMean", dest="noMean", default=False,action="store_true",
-                  help="This option only applies when --cofile is used.  When covfile is provided, the program will automatically add a global mean covariate to the model unless this option is specified.")
-
-basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
-
-advancedGroup.add_option("-v", "--verbose",
-                  action="store_true", dest="verbose", default=False,
-                  help="Print extra info")
-
-# Experimental Group Options
-experimentalGroup.add_option("--kfile2", dest="kfile2", 
-                  help="The location of a second kinship file.  This file has the same format as the first kinship.  This might be used if you want to correct for another form of confounding.")
-
-parser.add_option_group(basicGroup)
-parser.add_option_group(advancedGroup)
-parser.add_option_group(experimentalGroup)
-
-(options, args) = parser.parse_args()
-
-import sys
-import os
-import numpy as np
-from scipy import linalg
-from pylmm.lmm import LMM
-from pylmm import input
-
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue 
-
-if len(args) != 1:  
-   parser.print_help()
-   sys.exit()
-
-outFile = args[0]
-
-if not options.tfile and not options.bfile and not options.emmaFile: 
-#if not options.pfile and not options.tfile and not options.bfile: 
-   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an EMMA formatted file (--emmaSNP).")
-if not options.kfile:
-   parser.error("Please provide a pre-computed kinship file")
-
-numThreads = None
-if options.numThreads:
-   numThreads = int(options.numThreads)
-
-# READING PLINK input
-if options.verbose: sys.stderr.write("Reading SNP input...\n")
-if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-#elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.emmaFile: IN = input.plink(options.emmaFile,type='emma', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-else: parser.error("You must provide at least one PLINK input file base")
-
-if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos') and not os.path.isfile(options.emmaPheno):
-   parser.error("No .pheno file exist for %s.  Please provide a phenotype file using the --phenofile or --emmaPHENO argument." % (options.phenoFile or IN.fbase + '.phenos'))
-
-# Read the emma phenotype file if provided.
-# Format should be rows are phenotypes and columns are individuals.
-if options.emmaPheno:
-   f = open(options.emmaPheno,'r')
-   P = []
-   for line in f:
-      v = line.strip().split()
-      p = []
-      for x in v:
-	 try:
-	    p.append(float(x))
-	 except: p.append(np.nan)
-      P.append(p)
-   f.close()
-   IN.phenos = np.array(P).T
-
-# READING Covariate File
-if options.covfile: 
-   if options.verbose: 
-      sys.stderr.write("Reading covariate file...\n")
-   P = IN.getCovariates(options.covfile) 
-   if options.noMean: 
-      X0 = P
-   else: 
-      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
-elif options.emmaCov:
-   if options.verbose: 
-      sys.stderr.write("Reading covariate file...\n")
-   P = IN.getCovariatesEMMA(options.emmaCov) 
-   if options.noMean: 
-      X0 = P
-   else: 
-      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
-else: 
-   X0 = np.ones((IN.phenos.shape[0],1))
-
-if np.isnan(X0).sum(): 
-      parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
-
-# READING Kinship - major bottleneck for large datasets
-if options.verbose: sys.stderr.write("Reading kinship...\n")
-begin = time.time()
-# This method seems to be the fastest and works if you already know the size of the matrix
-if options.kfile[-3:] == '.gz':
-   import gzip
-   f = gzip.open(options.kfile,'r')
-   F = f.read() # might exhaust mem if the file is huge
-   K = np.fromstring(F,sep=' ') # Assume that space separated
-   f.close()
-else: 
-   K = np.fromfile(open(options.kfile,'r'),sep=" ")
-K.resize((len(IN.indivs),len(IN.indivs)))
-end = time.time()
-# Other slower ways
-#K = np.loadtxt(options.kfile)
-#K = np.genfromtxt(options.kfile)
-if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
-
-if options.kfile2:
-   if options.verbose: sys.stderr.write("Reading second kinship...\n")
-   begin = time.time()
-   # This method seems to be the fastest and works if you already know the size of the matrix
-   if options.kfile2[-3:] == '.gz':
-      import gzip
-      f = gzip.open(options.kfile2,'r')
-      F = f.read() # might exhaust mem if the file is huge
-      K2 = np.fromstring(F,sep=' ') # Assume that space separated
-      f.close()
-   else: K2 = np.fromfile(open(options.kfile2,'r'),sep=" ")
-   K2.resize((len(IN.indivs),len(IN.indivs)))
-   end = time.time()
-   if options.verbose: sys.stderr.write("Read the %d x %d second kinship matrix in %0.3fs \n" % (K2.shape[0],K2.shape[1],end-begin))
-
-# PROCESS the phenotype data -- Remove missing phenotype values
-# Keep will now index into the "full" data to select what we keep (either everything or a subset of non missing data
-Y = IN.phenos[:,options.pheno]
-v = np.isnan(Y)
-keep = True - v
-if v.sum():
-   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
-   Y = Y[keep]
-   X0 = X0[keep,:]
-   K = K[keep,:][:,keep]
-   if options.kfile2: K2 = K2[keep,:][:,keep]
-   Kva = []
-   Kve = []
-
-# Only load the decomposition if we did not remove individuals.
-# Otherwise it would not be correct and we would have to compute it again.
-if not v.sum() and options.eigenfile:
-   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
-   Kva = np.load(options.eigenfile + ".Kva")
-   Kve = np.load(options.eigenfile + ".Kve")
-else: 
-   Kva = []
-   Kve = []
-
-# CREATE LMM object for association
-n = K.shape[0]
-if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
-else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
-
-# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
-if not options.refit: 
-   if options.verbose: sys.stderr.write("Computing fit for null model\n")
-   L.fit()
-   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
-   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
-
-def compute_snp(collect):
-   snp = collect[0]
-   id = collect[1]
-   # result = []
-   # Check SNPs for missing values
-   x = snp[keep].reshape((n,1))  # all the SNPs
-   v = np.isnan(x).reshape((-1,))
-   if v.sum():
-      keeps = True - v
-      xs = x[keeps,:]
-      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
-         # PS.append(np.nan)
-         # TS.append(np.nan)
-         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
-         # continue
-         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
-
-      # Its ok to center the genotype -  I used options.normalizeGenotype to 
-      # force the removal of missing genotypes as opposed to replacing them with MAF.
-      if not options.normalizeGenotype:
-         xs = (xs - xs.mean()) / np.sqrt(xs.var())
-      Ys = Y[keeps]
-      X0s = X0[keeps,:]
-      Ks = K[keeps,:][:,keeps]
-      if options.kfile2:
-         K2s = K2[keeps,:][:,keeps]
-         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
-      else:
-         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
-      if options.refit:
-        Ls.fit(X=xs,REML=options.REML)
-      else:
-         #try:
-         Ls.fit(REML=options.REML)
-         #except: pdb.set_trace()
-      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
-   else:
-      if x.var() == 0: 
-         # PS.append(np.nan)
-         # TS.append(np.nan)
-         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
-         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
-         # continue
-
-      if options.refit:
-         L.fit(X=x,REML=options.REML)
-      # This is where it happens
-      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
-   
-   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
-   # PS.append(ps)
-   # TS.append(ts)
-   # return len(result)
-   # compute.q.put(result)
-   # return None
-
-def f_init(q):
-   compute_snp.q = q
-
-# Set up the pool
-# mp.set_start_method('spawn')
-q = mp.Queue()
-p = mp.Pool(numThreads, f_init, [q])
-collect = []
-   
-# Buffers for pvalues and t-stats
-# PS = []
-# TS = []
-count = 0
-out = open(outFile,'w')
-printOutHead()
-
-for snp_id in IN:
-   count += 1
-   if count % 1000 == 0:
-      if options.verbose:
-         sys.stderr.write("At SNP %d\n" % count)
-      # if count>8000 :
-      #    break         # for testing only
-   if count % 100 == 0:
-      for line in p.imap(compute_snp,collect):
-         out.write(line)
-      collect = []
-      
-   collect.append(snp_id)
-for line in p.imap(compute_snp,collect):
-   out.write(line)
-
-
diff --git a/wqflask/wqflask/my_pylmm/pylmmKinship.py b/wqflask/wqflask/my_pylmm/pylmmKinship.py
deleted file mode 100755
index c9a73ece..00000000
--- a/wqflask/wqflask/my_pylmm/pylmmKinship.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/python
-
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-#The program is free for academic use. Please contact Nick Furlotte
-#<nick.furlotte@gmail.com> if you are interested in using the software for
-#commercial purposes.
-
-#The software must not be modified and distributed without prior
-#permission of the author.
-
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import sys
-import pdb
-
-from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --[tfile | bfile] plinkFileBase outfile
-
-"""
-
-parser = OptionParser(usage=usage)
-
-basicGroup = OptionGroup(parser, "Basic Options")
-#advancedGroup = OptionGroup(parser, "Advanced Options")
-
-#basicGroup.add_option("--pfile", dest="pfile",
-#                  help="The base for a PLINK ped file")
-basicGroup.add_option("--tfile", dest="tfile",
-                  help="The base for a PLINK tped file")
-basicGroup.add_option("--bfile", dest="bfile",
-                  help="The base for a PLINK binary ped file")
-basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the rows and snps on the columns.")
-basicGroup.add_option("--emmaNumSNPs", dest="numSNPs", type="int", default=0,
-		     help="When providing the emmaSNP file you need to specify how many snps are in the file")
-
-basicGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
-basicGroup.add_option("-n", default=1000,dest="computeSize", type="int", help="The maximum number of SNPs to read into memory at once (default 1000).  This is important when there is a large number of SNPs, because memory could be an issue.")
-basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
-
-basicGroup.add_option("-v", "--verbose",
-                  action="store_true", dest="verbose", default=False,
-                  help="Print extra info")
-
-parser.add_option_group(basicGroup)
-#parser.add_option_group(advancedGroup)
-
-(options, args) = parser.parse_args()
-if len(args) != 1: 
-   parser.print_help()
-   sys.exit()
-
-outFile = args[0]
-
-import sys
-import os
-import numpy as np
-from scipy import linalg
-from pylmm.lmm import calculateKinship
-from pylmm import input
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue 
-
-if not options.tfile and not options.bfile and not options.emmaFile: 
-   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
-
-numThreads = None
-if options.numThreads:
-   numThreads = int(options.numThreads)
-   
-if options.verbose: sys.stderr.write("Reading PLINK input...\n")
-if options.bfile: IN = input.plink(options.bfile,type='b')
-elif options.tfile: IN = input.plink(options.tfile,type='t')
-#elif options.pfile: IN = input.plink(options.pfile,type='p')
-elif options.emmaFile: 
-   if not options.numSNPs: parser.error("You must provide the number of SNPs when specifying an emma formatted file.")
-   IN = input.plink(options.emmaFile,type='emma')
-else: parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
-
-def compute_W(job):
-   """
-   Read 1000 SNPs at a time into matrix and return the result
-   """
-   n = len(IN.indivs)
-   m = options.computeSize
-   W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
-   for j in range(0,options.computeSize):
-      row = job*m + j
-      if row >= IN.numSNPs:
-         W = W[:,range(0,j)]
-         break
-      snp,id = IN.next()
-      if snp.var() == 0:
-         continue
-      W[:,j] = snp  # set row to list of SNPs
-   return W
-
-def compute_dgemm(job,W):
-   """
-   Compute Kinship(W)*j
-
-   For every set of SNPs dgemm is used to multiply matrices T(W)*W
-   """
-   res = None
-   try:
-      res = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False)
-   except AttributeError:
-      res = np.dot(W,W.T) 
-   compute_dgemm.q.put([job,res])
-   return job
-
-def f_init(q):
-    compute_dgemm.q = q
-
-n = len(IN.indivs)
-# m = options.computeSize
-# jobsize=m
-
-IN.getSNPIterator()
-# Annoying hack to get around the fact that it is expensive to determine the number of SNPs in an emma file
-if options.emmaFile: IN.numSNPs = options.numSNPs
-# i = 0
-
-# mp.set_start_method('spawn')
-q = mp.Queue()
-p = mp.Pool(numThreads, f_init, [q])
-iterations = IN.numSNPs/options.computeSize+1
-# jobs = range(0,8) # range(0,iterations)
-
-results = []
-
-K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
-
-completed = 0
-
-# for job in range(8):
-for job in range(iterations):
-   if options.verbose:
-      sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
-   W = compute_W(job)
-   results.append(p.apply_async(compute_dgemm, (job,W)))   
-   # Do we have a result?
-   try: 
-     j,x = q.get_nowait()
-     if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
-     K_j = x
-     # print j,K_j[:,0]
-     K = K + K_j
-     completed += 1
-   except Queue.Empty:
-     pass
-
-for job in range(len(results)-completed):
-   j,x = q.get()
-   if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
-   K_j = x
-   # print j,K_j[:,0]
-   K = K + K_j
-        
-K = K / float(IN.numSNPs)
-if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
-np.savetxt(outFile,K)
-
-if options.saveEig:
-   if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
-   Kva,Kve = linalg.eigh(K)
-   if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
-   np.savetxt(outFile+".kva",Kva)
-   np.savetxt(outFile+".kve",Kve)
-      
-
-
-
diff --git a/wqflask/wqflask/my_pylmm/run_pylmm.py b/wqflask/wqflask/my_pylmm/run_pylmm.py
deleted file mode 100755
index 0c96d986..00000000
--- a/wqflask/wqflask/my_pylmm/run_pylmm.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-from base import data_set
-from base.species import TheSpecies
-    
-    def run(dataset_name, vals, temp_uuid):
-        """Generates p-values for each marker"""
-
-        tempdata = temp_data.TempData(temp_uuid)
-        
-        dataset = data_set.create_dataset(dataset_name)
-        species = TheSpecies(dataset=dataset)
-
-        samples = [] # Want only ones with values
-        vals = vals
-
-        for sample in dataset.group.samplelist:
-            samples.append(str(sample))
-            
-        gen_data(dataset, vals, tempdata)
-
-
-    def gen_data(dataset, vals)
-        dataset.group.get_markers()
-
-        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in vals])
-
-        if dataset.group.species == "human":
-            p_values, t_stats = gen_human_results(pheno_vector, tempdata)
-        else:
-            genotype_data = [marker['genotypes'] for marker in dataset.group.markers.markers]
-            
-            no_val_samples = self.identify_empty_samples()
-            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
-            
-            genotype_matrix = np.array(trimmed_genotype_data).T
-            
-            #print("pheno_vector: ", pf(pheno_vector))
-            #print("genotype_matrix: ", pf(genotype_matrix))
-            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
-            
-            t_stats, p_values = lmm.run(
-                pheno_vector,
-                genotype_matrix,
-                restricted_max_likelihood=True,
-                refit=False,
-                temp_data=tempdata
-            )
-            #print("p_values:", p_values)
-        
-        self.dataset.group.markers.add_pvalues(p_values)
-        return self.dataset.group.markers.markers
-
-
-    def gen_human_results(self, pheno_vector, tempdata):
-        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
-
-        plink_input = input.plink(file_base, type='b')
-        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
-
-        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
-        covariate_matrix = np.ones((pheno_vector.shape[0],1))
-        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
-        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
-
-        p_values, t_stats = lmm.run_human(
-                pheno_vector,
-                covariate_matrix,
-                input_file_name,
-                kinship_matrix,
-                loading_progress=tempdata
-            )
-
-        return p_values, t_stats
-    
-if __name__ == '__main__':
-    run(dataset_name, vals, temp_uuid)
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/scripts/example.py b/wqflask/wqflask/my_pylmm/scripts/example.py
new file mode 100755
index 00000000..8b30debd
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/example.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import time
+
+import numpy as np
+from pyLMM import lmm
+
+from pprint import pformat as pf
+
+
+Y = np.genfromtxt('data/mdp.exprs.1.new')
+print("exprs is:", pf(Y.shape))
+
+# Loading npdump and first 1000 snps for speed
+#K = np.load('data/hmdp.liver.K.npdump')
+#snps = np.load('data/hmdp.liver.snps.1000.npdump').T
+
+# These three lines will load all SNPs (from npdump or from txt) and 
+# calculate the kinship
+snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
+print("snps is:", pf(snps.shape))
+#snps = snps[~np.isnan(snps).all(axis=1)]
+#print ("snps is now:", pf(snps))
+np.savetxt("/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.trimmed", snps, fmt='%s', delimiter=' ')
+#snps = np.load('data/hmdp.liver.snps.npdump').T
+K = lmm.calculateKinship(snps)
+#print("K is:", pf(K))
+#print("Y is:", pf(Y.shape))
+
+# Instantiate a LMM object for the phentoype Y and fit the null model
+L = lmm.LMM(Y,K)
+L.fit()
+
+# Manually calculate the association at one SNP
+X = snps[:,0]
+X[np.isnan(X)] = X[True - np.isnan(X)].mean() # Fill missing with MAF
+X = X.reshape(len(X),1)
+if X.var() == 0: ts,ps = (np.nan,np.nan)
+else: ts,ps = L.association(X)
+
+# If I want to refit the variance component
+L.fit(X=X)
+ts,ps = L.association(X)
+
+# If I want to do a genome-wide scan over the 1000 SNPs.
+# This call will use REML (REML = False means use ML).
+# It will also refit the variance components for each SNP.
+# Setting refit = False will cause the program to fit the model once
+# and hold those variance component estimates for each SNP.
+begin = time.time()
+TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=False)
+print("TS is:", pf(TS))
+print("PS is:", pf(PS))
+end = time.time()
+sys.stderr.write("Total time for 1000 SNPs: %0.3f\n" % (end- begin))
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
new file mode 100755
index 00000000..74b58dd6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
@@ -0,0 +1,346 @@
+#!/usr/bin/python
+
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+
+#The program is free for academic use. Please contact Nick Furlotte
+#<nick.furlotte@gmail.com> if you are interested in using the software for
+#commercial purposes.
+
+#The software must not be modified and distributed without prior
+#permission of the author.
+
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pdb
+import time
+import sys
+
+def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
+
+def formatResult(id,beta,betaSD,ts,ps):
+   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
+   
+def outputResult(id,beta,betaSD,ts,ps):
+   out.write(formatResult(id,beta,betaSD,ts,ps))
+
+from optparse import OptionParser,OptionGroup
+usage = """usage: %prog [options] --kfile kinshipFile --[tfile | bfile] plinkFileBase outfileBase
+
+This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.  
+The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.  
+
+Basic usage:
+
+      python pylmmGWAS.py -v --bfile plinkFile --kfile preComputedKinship.kin --phenofile plinkFormattedPhenotypeFile resultFile
+
+	    """
+parser = OptionParser(usage=usage)
+
+basicGroup = OptionGroup(parser, "Basic Options")
+advancedGroup = OptionGroup(parser, "Advanced Options")
+experimentalGroup = OptionGroup(parser, "Experimental Options")
+
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
+basicGroup.add_option("--tfile", dest="tfile",
+                  help="The base for a PLINK tped file")
+basicGroup.add_option("--bfile", dest="bfile",
+                  help="The base for a PLINK binary bed file")
+basicGroup.add_option("--phenofile", dest="phenoFile", default=None,
+                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  This file should be in plink format. ")
+
+# EMMA Options
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the columns and snps on the rows.")
+basicGroup.add_option("--emmaPHENO", dest="emmaPheno", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each phenotype as one row.")
+basicGroup.add_option("--emmaCOV", dest="emmaCov", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each covariate as one row.")
+
+basicGroup.add_option("--kfile", dest="kfile",
+                  help="The location of a kinship file.  This is an nxn plain text file and can be computed with the pylmmKinship program.")
+basicGroup.add_option("--covfile", dest="covfile",
+                  help="The location of a covariate file file.  This is a plink formatted covariate file.")
+basicGroup.add_option("-p", type="int", dest="pheno", help="The phenotype index to be used in association.", default=0)
+
+
+
+advancedGroup.add_option("--removeMissingGenotypes",
+                  action="store_false", dest="normalizeGenotype", default=True,
+                  help="By default the program replaces missing genotypes with the minor allele frequency.  This option overrides that behavior making the program remove missing individuals.  NOTE: This can increase running time due to the need to recompute the eigendecomposition for each SNP with missing values.")
+advancedGroup.add_option("--refit",
+                  action="store_true", dest="refit", default=False,
+                  help="Refit the variance components at each SNP (default is to lock in the variance components under the null).")
+
+advancedGroup.add_option("--REML",
+                  action="store_true", dest="REML", default=False,
+                  help="Use restricted maximum-likelihood (REML) (default is maximum-likelihood).")
+#advancedGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
+advancedGroup.add_option("--eigen", dest="eigenfile",
+                  help="The location of the precomputed eigendecomposition for the kinship file.  These can be computed with pylmmKinship.py.")
+advancedGroup.add_option("--noMean", dest="noMean", default=False,action="store_true",
+                  help="This option only applies when --cofile is used.  When covfile is provided, the program will automatically add a global mean covariate to the model unless this option is specified.")
+
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
+
+advancedGroup.add_option("-v", "--verbose",
+                  action="store_true", dest="verbose", default=False,
+                  help="Print extra info")
+
+# Experimental Group Options
+experimentalGroup.add_option("--kfile2", dest="kfile2", 
+                  help="The location of a second kinship file.  This file has the same format as the first kinship.  This might be used if you want to correct for another form of confounding.")
+
+parser.add_option_group(basicGroup)
+parser.add_option_group(advancedGroup)
+parser.add_option_group(experimentalGroup)
+
+(options, args) = parser.parse_args()
+
+import sys
+import os
+import numpy as np
+from scipy import linalg
+from pylmm.lmm import LMM
+from pylmm import input
+
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if len(args) != 1:  
+   parser.print_help()
+   sys.exit()
+
+outFile = args[0]
+
+if not options.tfile and not options.bfile and not options.emmaFile: 
+#if not options.pfile and not options.tfile and not options.bfile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an EMMA formatted file (--emmaSNP).")
+if not options.kfile:
+   parser.error("Please provide a pre-computed kinship file")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
+
+# READING PLINK input
+if options.verbose: sys.stderr.write("Reading SNP input...\n")
+if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+#elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.emmaFile: IN = input.plink(options.emmaFile,type='emma', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+else: parser.error("You must provide at least one PLINK input file base")
+
+if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos') and not os.path.isfile(options.emmaPheno):
+   parser.error("No .pheno file exist for %s.  Please provide a phenotype file using the --phenofile or --emmaPHENO argument." % (options.phenoFile or IN.fbase + '.phenos'))
+
+# Read the emma phenotype file if provided.
+# Format should be rows are phenotypes and columns are individuals.
+if options.emmaPheno:
+   f = open(options.emmaPheno,'r')
+   P = []
+   for line in f:
+      v = line.strip().split()
+      p = []
+      for x in v:
+	 try:
+	    p.append(float(x))
+	 except: p.append(np.nan)
+      P.append(p)
+   f.close()
+   IN.phenos = np.array(P).T
+
+# READING Covariate File
+if options.covfile: 
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariates(options.covfile) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+elif options.emmaCov:
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariatesEMMA(options.emmaCov) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+else: 
+   X0 = np.ones((IN.phenos.shape[0],1))
+
+if np.isnan(X0).sum(): 
+      parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
+
+# READING Kinship - major bottleneck for large datasets
+if options.verbose: sys.stderr.write("Reading kinship...\n")
+begin = time.time()
+# This method seems to be the fastest and works if you already know the size of the matrix
+if options.kfile[-3:] == '.gz':
+   import gzip
+   f = gzip.open(options.kfile,'r')
+   F = f.read() # might exhaust mem if the file is huge
+   K = np.fromstring(F,sep=' ') # Assume that space separated
+   f.close()
+else: 
+   K = np.fromfile(open(options.kfile,'r'),sep=" ")
+K.resize((len(IN.indivs),len(IN.indivs)))
+end = time.time()
+# Other slower ways
+#K = np.loadtxt(options.kfile)
+#K = np.genfromtxt(options.kfile)
+if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
+
+if options.kfile2:
+   if options.verbose: sys.stderr.write("Reading second kinship...\n")
+   begin = time.time()
+   # This method seems to be the fastest and works if you already know the size of the matrix
+   if options.kfile2[-3:] == '.gz':
+      import gzip
+      f = gzip.open(options.kfile2,'r')
+      F = f.read() # might exhaust mem if the file is huge
+      K2 = np.fromstring(F,sep=' ') # Assume that space separated
+      f.close()
+   else: K2 = np.fromfile(open(options.kfile2,'r'),sep=" ")
+   K2.resize((len(IN.indivs),len(IN.indivs)))
+   end = time.time()
+   if options.verbose: sys.stderr.write("Read the %d x %d second kinship matrix in %0.3fs \n" % (K2.shape[0],K2.shape[1],end-begin))
+
+# PROCESS the phenotype data -- Remove missing phenotype values
+# Keep will now index into the "full" data to select what we keep (either everything or a subset of non missing data
+Y = IN.phenos[:,options.pheno]
+v = np.isnan(Y)
+keep = True - v
+if v.sum():
+   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
+   Y = Y[keep]
+   X0 = X0[keep,:]
+   K = K[keep,:][:,keep]
+   if options.kfile2: K2 = K2[keep,:][:,keep]
+   Kva = []
+   Kve = []
+
+# Only load the decomposition if we did not remove individuals.
+# Otherwise it would not be correct and we would have to compute it again.
+if not v.sum() and options.eigenfile:
+   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
+   Kva = np.load(options.eigenfile + ".Kva")
+   Kve = np.load(options.eigenfile + ".Kve")
+else: 
+   Kva = []
+   Kve = []
+
+# CREATE LMM object for association
+n = K.shape[0]
+if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
+else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
+
+# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
+if not options.refit: 
+   if options.verbose: sys.stderr.write("Computing fit for null model\n")
+   L.fit()
+   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
+   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
+
+def compute_snp(collect):
+   snp = collect[0]
+   id = collect[1]
+   # result = []
+   # Check SNPs for missing values
+   x = snp[keep].reshape((n,1))  # all the SNPs
+   v = np.isnan(x).reshape((-1,))
+   if v.sum():
+      keeps = True - v
+      xs = x[keeps,:]
+      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+         # continue
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+
+      # Its ok to center the genotype -  I used options.normalizeGenotype to 
+      # force the removal of missing genotypes as opposed to replacing them with MAF.
+      if not options.normalizeGenotype:
+         xs = (xs - xs.mean()) / np.sqrt(xs.var())
+      Ys = Y[keeps]
+      X0s = X0[keeps,:]
+      Ks = K[keeps,:][:,keeps]
+      if options.kfile2:
+         K2s = K2[keeps,:][:,keeps]
+         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
+      else:
+         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
+      if options.refit:
+        Ls.fit(X=xs,REML=options.REML)
+      else:
+         #try:
+         Ls.fit(REML=options.REML)
+         #except: pdb.set_trace()
+      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
+   else:
+      if x.var() == 0: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+         # continue
+
+      if options.refit:
+         L.fit(X=x,REML=options.REML)
+      # This is where it happens
+      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
+   
+   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
+   # PS.append(ps)
+   # TS.append(ts)
+   # return len(result)
+   # compute.q.put(result)
+   # return None
+
+def f_init(q):
+   compute_snp.q = q
+
+# Set up the pool
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+collect = []
+   
+# Buffers for pvalues and t-stats
+# PS = []
+# TS = []
+count = 0
+out = open(outFile,'w')
+printOutHead()
+
+for snp_id in IN:
+   count += 1
+   if count % 1000 == 0:
+      if options.verbose:
+         sys.stderr.write("At SNP %d\n" % count)
+      # if count>8000 :
+      #    break         # for testing only
+   if count % 100 == 0:
+      for line in p.imap(compute_snp,collect):
+         out.write(line)
+      collect = []
+      
+   collect.append(snp_id)
+for line in p.imap(compute_snp,collect):
+   out.write(line)
+
+
diff --git a/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
new file mode 100755
index 00000000..c9a73ece
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
@@ -0,0 +1,186 @@
+#!/usr/bin/python
+
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+
+#The program is free for academic use. Please contact Nick Furlotte
+#<nick.furlotte@gmail.com> if you are interested in using the software for
+#commercial purposes.
+
+#The software must not be modified and distributed without prior
+#permission of the author.
+
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import pdb
+
+from optparse import OptionParser,OptionGroup
+usage = """usage: %prog [options] --[tfile | bfile] plinkFileBase outfile
+
+"""
+
+parser = OptionParser(usage=usage)
+
+basicGroup = OptionGroup(parser, "Basic Options")
+#advancedGroup = OptionGroup(parser, "Advanced Options")
+
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
+basicGroup.add_option("--tfile", dest="tfile",
+                  help="The base for a PLINK tped file")
+basicGroup.add_option("--bfile", dest="bfile",
+                  help="The base for a PLINK binary ped file")
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the rows and snps on the columns.")
+basicGroup.add_option("--emmaNumSNPs", dest="numSNPs", type="int", default=0,
+		     help="When providing the emmaSNP file you need to specify how many snps are in the file")
+
+basicGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
+basicGroup.add_option("-n", default=1000,dest="computeSize", type="int", help="The maximum number of SNPs to read into memory at once (default 1000).  This is important when there is a large number of SNPs, because memory could be an issue.")
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
+
+basicGroup.add_option("-v", "--verbose",
+                  action="store_true", dest="verbose", default=False,
+                  help="Print extra info")
+
+parser.add_option_group(basicGroup)
+#parser.add_option_group(advancedGroup)
+
+(options, args) = parser.parse_args()
+if len(args) != 1: 
+   parser.print_help()
+   sys.exit()
+
+outFile = args[0]
+
+import sys
+import os
+import numpy as np
+from scipy import linalg
+from pylmm.lmm import calculateKinship
+from pylmm import input
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if not options.tfile and not options.bfile and not options.emmaFile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
+   
+if options.verbose: sys.stderr.write("Reading PLINK input...\n")
+if options.bfile: IN = input.plink(options.bfile,type='b')
+elif options.tfile: IN = input.plink(options.tfile,type='t')
+#elif options.pfile: IN = input.plink(options.pfile,type='p')
+elif options.emmaFile: 
+   if not options.numSNPs: parser.error("You must provide the number of SNPs when specifying an emma formatted file.")
+   IN = input.plink(options.emmaFile,type='emma')
+else: parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+def compute_W(job):
+   """
+   Read 1000 SNPs at a time into matrix and return the result
+   """
+   n = len(IN.indivs)
+   m = options.computeSize
+   W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
+   for j in range(0,options.computeSize):
+      row = job*m + j
+      if row >= IN.numSNPs:
+         W = W[:,range(0,j)]
+         break
+      snp,id = IN.next()
+      if snp.var() == 0:
+         continue
+      W[:,j] = snp  # set row to list of SNPs
+   return W
+
+def compute_dgemm(job,W):
+   """
+   Compute Kinship(W)*j
+
+   For every set of SNPs dgemm is used to multiply matrices T(W)*W
+   """
+   res = None
+   try:
+      res = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False)
+   except AttributeError:
+      res = np.dot(W,W.T) 
+   compute_dgemm.q.put([job,res])
+   return job
+
+def f_init(q):
+    compute_dgemm.q = q
+
+n = len(IN.indivs)
+# m = options.computeSize
+# jobsize=m
+
+IN.getSNPIterator()
+# Annoying hack to get around the fact that it is expensive to determine the number of SNPs in an emma file
+if options.emmaFile: IN.numSNPs = options.numSNPs
+# i = 0
+
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+iterations = IN.numSNPs/options.computeSize+1
+# jobs = range(0,8) # range(0,iterations)
+
+results = []
+
+K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
+
+completed = 0
+
+# for job in range(8):
+for job in range(iterations):
+   if options.verbose:
+      sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
+   W = compute_W(job)
+   results.append(p.apply_async(compute_dgemm, (job,W)))   
+   # Do we have a result?
+   try: 
+     j,x = q.get_nowait()
+     if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+     K_j = x
+     # print j,K_j[:,0]
+     K = K + K_j
+     completed += 1
+   except Queue.Empty:
+     pass
+
+for job in range(len(results)-completed):
+   j,x = q.get()
+   if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+   K_j = x
+   # print j,K_j[:,0]
+   K = K + K_j
+        
+K = K / float(IN.numSNPs)
+if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
+np.savetxt(outFile,K)
+
+if options.saveEig:
+   if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
+   Kva,Kve = linalg.eigh(K)
+   if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
+   np.savetxt(outFile+".kva",Kva)
+   np.savetxt(outFile+".kve",Kve)
+      
+
+
+
diff --git a/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py
new file mode 100755
index 00000000..0c96d986
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py
@@ -0,0 +1,77 @@
+from __future__ import absolute_import, print_function, division
+
+from base import data_set
+from base.species import TheSpecies
+    
+    def run(dataset_name, vals, temp_uuid):
+        """Generates p-values for each marker"""
+
+        tempdata = temp_data.TempData(temp_uuid)
+        
+        dataset = data_set.create_dataset(dataset_name)
+        species = TheSpecies(dataset=dataset)
+
+        samples = [] # Want only ones with values
+        vals = vals
+
+        for sample in dataset.group.samplelist:
+            samples.append(str(sample))
+            
+        gen_data(dataset, vals, tempdata)
+
+
+    def gen_data(dataset, vals)
+        dataset.group.get_markers()
+
+        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in vals])
+
+        if dataset.group.species == "human":
+            p_values, t_stats = gen_human_results(pheno_vector, tempdata)
+        else:
+            genotype_data = [marker['genotypes'] for marker in dataset.group.markers.markers]
+            
+            no_val_samples = self.identify_empty_samples()
+            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+            
+            genotype_matrix = np.array(trimmed_genotype_data).T
+            
+            #print("pheno_vector: ", pf(pheno_vector))
+            #print("genotype_matrix: ", pf(genotype_matrix))
+            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
+            
+            t_stats, p_values = lmm.run(
+                pheno_vector,
+                genotype_matrix,
+                restricted_max_likelihood=True,
+                refit=False,
+                temp_data=tempdata
+            )
+            #print("p_values:", p_values)
+        
+        self.dataset.group.markers.add_pvalues(p_values)
+        return self.dataset.group.markers.markers
+
+
+    def gen_human_results(self, pheno_vector, tempdata):
+        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
+
+        plink_input = input.plink(file_base, type='b')
+        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
+
+        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
+        covariate_matrix = np.ones((pheno_vector.shape[0],1))
+        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
+        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
+
+        p_values, t_stats = lmm.run_human(
+                pheno_vector,
+                covariate_matrix,
+                input_file_name,
+                kinship_matrix,
+                loading_progress=tempdata
+            )
+
+        return p_values, t_stats
+    
+if __name__ == '__main__':
+    run(dataset_name, vals, temp_uuid)
\ No newline at end of file
-- 
cgit v1.2.3


From 7fee695b79984912dd610a55e84db602780becac Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Feb 2015 15:02:30 +0300
Subject: Splitting out handlers

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8de3b3a7..d7d60e6e 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -714,6 +714,7 @@ class LMM:
        pl.ylabel("Probability of data")
        pl.title(title)
 
+# This is the main function used by Genenetwork2 (with environment)
 def gn2_main():
     parser = argparse.ArgumentParser(description='Run pyLMM')
     parser.add_argument('-k', '--key')
@@ -755,6 +756,15 @@ def gn2_main():
     Redis.rpush(results_key, json_results)
     Redis.expire(results_key, 60*60)
 
+# This is the main version used without Genenetwork2's environment
+def cli_main():
+    ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
+                       covariate_matrix = np.array(params['covariate_matrix']),
+                       plink_input_file = params['input_file_name'],
+                       kinship_matrix = np.array(params['kinship_matrix']),
+                       refit = params['refit'],
+                       tempdata = tempdata)
+        
 if __name__ == '__main__':
     if has_gn2:
         gn2_main()
-- 
cgit v1.2.3


From d73ef7975657ed937d4a603ca32d6ca547292ec5 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 28 Feb 2015 12:31:46 +0300
Subject: Started adaptations for introducing multi-core version

---
 wqflask/wqflask/my_pylmm/pyLMM/input.py     |  2 +-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py       | 46 +++++++++++-------------
 wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py | 55 +++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 26 deletions(-)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index dfe28a4e..fdf02e42 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -260,4 +260,4 @@ class plink:
             L.append(D[self.indivs[i]])
         P = P[L,:]
      
-        return P
\ No newline at end of file
+        return P
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d7d60e6e..54a40e1f 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -47,12 +47,13 @@ print("sys.path2:", sys.path)
 
 has_gn2=True
 
+from utility.benchmark import Bench
+from utility import temp_data
+
 try:
-    from utility.benchmark import Bench
-    from utility import temp_data
     from wqflask.my_pylmm.pyLMM import chunks
 except ImportError:
-    print("WARNING: Standalone version\n")
+    print("WARNING: Standalone version missing the Genenetwork2 environment\n")
     has_gn2=False
     pass
 
@@ -253,20 +254,21 @@ def run_other(pheno_vector,
         genotype_matrix,
         restricted_max_likelihood=True,
         refit=False,
-        tempdata=None):
+        tempdata=None,      # <---- can not be None
+        is_testing=False):
+    
     """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
     
     restricted_max_likelihood -- whether to use restricted max likelihood; True or False
     refit -- whether to refit the variance component for each marker
     temp_data -- TempData object that stores the progress for each major step of the
-    calculations ("calculate_kinship" and "GWAS" take the majority of time)
+    calculations ("calculate_kinship" and "GWAS" take the majority of time) 
     
     """
     
-    
     print("In run_other")
     with Bench("Calculate Kinship"):
-        kinship_matrix = calculate_kinship(genotype_matrix, tempdata)
+        kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing)
     
     print("kinship_matrix: ", pf(kinship_matrix))
     print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -321,7 +323,7 @@ def matrixMult(A,B):
     return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
 
 
-def calculate_kinship(genotype_matrix, temp_data):
+def calculate_kinship(genotype_matrix, temp_data, is_testing=False):
     """
     genotype_matrix is an n x m matrix encoding SNP minor alleles.
     
@@ -331,10 +333,12 @@ def calculate_kinship(genotype_matrix, temp_data):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    print("n is:", n)
-    print("m is:", m)
+    print("genotype matrix n is:", n)
+    print("genotype matrix m is:", m)
     keep = []
     for counter in range(m):
+        if is_testing and counter>8:
+            break
         #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
         #Checks if any values in column are not numbers
         not_number = np.isnan(genotype_matrix[:,counter])
@@ -478,7 +482,7 @@ class LMM:
           is not done consistently.
  
     """
-    def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True):
+    def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True,is_testing=False):
  
        """
        The constructor takes a phenotype vector or array of size n.
@@ -487,7 +491,8 @@ class LMM:
        X0 is an optional covariate matrix of size n x q, where there are q covariates.
        When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
        """
- 
+
+       self.is_testing = True
        if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
        self.verbose = verbose
  
@@ -507,14 +512,14 @@ class LMM:
        self.nonmissing = x
  
        print("this K is:", pf(K))
- 
+       
        if len(Kva) == 0 or len(Kve) == 0:
           if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
           begin = time.time()
           Kva,Kve = linalg.eigh(K)
           end = time.time()
           if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
-       
+
        self.K = K
        self.Kva = Kva
        self.Kve = Kve
@@ -745,7 +750,7 @@ def gn2_main():
                   genotype_matrix = np.array(params['genotype_matrix']),
                   restricted_max_likelihood = params['restricted_max_likelihood'],
                   refit = params['refit'],
-                  tempdata = tempdata)
+                  tempdata = tempdata, is_testing=False)
         
     results_key = "pylmm:results:" + params['temp_uuid']
 
@@ -755,21 +760,12 @@ def gn2_main():
     #Pushing json_results into a list where it is the only item because blpop needs a list
     Redis.rpush(results_key, json_results)
     Redis.expire(results_key, 60*60)
-
-# This is the main version used without Genenetwork2's environment
-def cli_main():
-    ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
-                       covariate_matrix = np.array(params['covariate_matrix']),
-                       plink_input_file = params['input_file_name'],
-                       kinship_matrix = np.array(params['kinship_matrix']),
-                       refit = params['refit'],
-                       tempdata = tempdata)
         
 if __name__ == '__main__':
     if has_gn2:
         gn2_main()
     else:
-        cli_main()
+        print("Run from pylmmGWAS.py instead")
 
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
new file mode 100644
index 00000000..abb72081
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
@@ -0,0 +1,55 @@
+import sys
+import time
+import numpy as np
+from numpy.distutils.system_info import get_info;
+from scipy import linalg
+from scipy import optimize
+from scipy import stats
+
+useNumpy = None
+hasBLAS = None
+
+def matrix_initialize(useBLAS):
+    global useNumpy  # module based variable
+    if useBLAS and useNumpy == None:
+        print get_info('blas_opt')
+        try:
+            linalg.fblas
+            sys.stderr.write("INFO: using linalg.fblas\n")
+            useNumpy = False
+            hasBLAS  = True
+        except AttributeError:
+            sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n")
+            useNumpy = True
+    else:
+        sys.stderr.write("INFO: using numpy.dot\n")
+        useNumpy=True
+        
+def matrixMult(A,B):
+   global useNumpy  # module based variable
+
+   if useNumpy:
+       return np.dot(A,B)
+
+   # If the matrices are in Fortran order then the computations will be faster
+   # when using dgemm.  Otherwise, the function will copy the matrix and that takes time.
+   if not A.flags['F_CONTIGUOUS']:
+      AA = A.T
+      transA = True
+   else:
+      AA = A
+      transA = False
+
+   if not B.flags['F_CONTIGUOUS']:
+      BB = B.T
+      transB = True
+   else:
+      BB = B
+      transB = False
+
+   return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
+
+def matrixMultT(M):
+    # res = np.dot(W,W.T)
+    # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False)
+    return matrixMult(M,M.T)
-- 
cgit v1.2.3


From c523f48392d7adf1ec2f5cb84a4e5ab3a594e569 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 1 Mar 2015 12:27:41 +0300
Subject: Add warning to running lmm.py from the command line

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 54a40e1f..d3214d6c 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -762,6 +762,7 @@ def gn2_main():
     Redis.expire(results_key, 60*60)
         
 if __name__ == '__main__':
+    print "WARNING: Calling pylmm from lmm.py will become OBSOLETE, use run_pylmm instead!"
     if has_gn2:
         gn2_main()
     else:
-- 
cgit v1.2.3


From 52204e9f7ec8fce00c0244ce5286fa394615604c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 1 Mar 2015 13:15:30 +0300
Subject: Warning

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d3214d6c..f69d4958 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -762,7 +762,7 @@ def gn2_main():
     Redis.expire(results_key, 60*60)
         
 if __name__ == '__main__':
-    print "WARNING: Calling pylmm from lmm.py will become OBSOLETE, use run_pylmm instead!"
+    print "WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!"
     if has_gn2:
         gn2_main()
     else:
-- 
cgit v1.2.3


From b8fd98ed442fd7e0188652077d3d0b807c50417f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 3 Mar 2015 17:19:39 +0300
Subject: fix print

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index f69d4958..3cbab149 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -762,7 +762,7 @@ def gn2_main():
     Redis.expire(results_key, 60*60)
         
 if __name__ == '__main__':
-    print "WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!"
+    print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
     if has_gn2:
         gn2_main()
     else:
-- 
cgit v1.2.3


From d422300b8781049a903c68d9cc342c4596079ccd Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 5 Mar 2015 14:56:25 +0300
Subject: Removed execution flag x from files

---
 wqflask/wqflask/my_pylmm/pyLMM/__init__.py      | 0
 wqflask/wqflask/my_pylmm/pyLMM/chunks.py        | 0
 wqflask/wqflask/my_pylmm/pyLMM/input.py         | 0
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py           | 0
 wqflask/wqflask/my_pylmm/pyLMM/process_plink.py | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/__init__.py
 mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/chunks.py
 mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/input.py
 mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm.py
 mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/process_plink.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py b/wqflask/wqflask/my_pylmm/pyLMM/chunks.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py b/wqflask/wqflask/my_pylmm/pyLMM/process_plink.py
old mode 100755
new mode 100644
-- 
cgit v1.2.3


From 8053869aa61fb430b89a32b0f8024fc133cea28f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 5 Mar 2015 16:12:14 +0300
Subject: Helpers to split out functionality

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 36 ++++++++++++++++++++++++++++
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py     | 20 ++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
new file mode 100644
index 00000000..4147d90b
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -0,0 +1,36 @@
+# This is a converter for common LMM formats, so as to keep complexity
+# outside the main routines. 
+
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from optparse import OptionParser
+
+usage = """
+Convert files for runlmm.py processing
+"""
+
+parser = OptionParser(usage=usage)
+# parser.add_option("-f", "--file", dest="input file",
+#                   help="In", metavar="FILE")
+parser.add_option("--kinship",
+                  help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
+parser.add_option("-q", "--quiet",
+                  action="store_false", dest="verbose", default=True,
+                  help="don't print status messages to stdout")
+
+(options, args) = parser.parse_args()
+
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
new file mode 100644
index 00000000..df86f718
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -0,0 +1,20 @@
+# This is the LMM runner that calls the possible methods using command line
+# switches. It acts as a multiplexer where all the invocation complexity
+# is kept outside the main LMM routines.
+#
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
-- 
cgit v1.2.3


From 0754dbf77f4362beaef45b0ac651676d47659b70 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 5 Mar 2015 16:50:14 +0300
Subject: Convert textual kinship file

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 35 ++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 4147d90b..35fccef4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -17,15 +17,25 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from optparse import OptionParser
+import sys
+import os
+import numpy as np
+# from lmm import LMM, run_other
+import input
+
 
 usage = """
-Convert files for runlmm.py processing
+python convertlmm.py [--kinship] infile 
+
+  Convert files for runlmm.py processing. Writes to stdout.
+
+  try --help for more information
 """
 
 parser = OptionParser(usage=usage)
 # parser.add_option("-f", "--file", dest="input file",
 #                   help="In", metavar="FILE")
-parser.add_option("--kinship",
+parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
@@ -33,4 +43,25 @@ parser.add_option("-q", "--quiet",
 
 (options, args) = parser.parse_args()
 
+if len(args) == 0:
+    print usage
+    sys.exit(1)
 
+if options.kinship:
+    is_header = True
+    assert(len(args)==1)
+    count = 0
+    for line in open(args[0],'r'):
+        count += 1
+        if is_header:
+            size = len(line.split())
+            print "# Kinship format version 1.0"
+            print "# Size=",size
+            for i in range(size):
+                sys.stdout.write("\t"+str(i+1))
+            sys.stdout.write("\n")
+            is_header = False
+        sys.stdout.write(str(count))
+        sys.stdout.write("\t")
+        sys.stdout.write("\t".join(line.split()))
+        sys.stdout.write("\n")
-- 
cgit v1.2.3


From b9d8057c2017ade6dd6ce54c45d718beb7c25c6a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 5 Mar 2015 17:27:36 +0300
Subject: Kinship file format support (reader and writer)

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py |  1 -
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py     | 49 ++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 35fccef4..012e96b2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -23,7 +23,6 @@ import numpy as np
 # from lmm import LMM, run_other
 import input
 
-
 usage = """
 python convertlmm.py [--kinship] infile 
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index df86f718..4a640d6d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -17,4 +17,53 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from optparse import OptionParser
+import sys
+import os
+import numpy as np
+# from lmm import LMM, run_other
+import csv
 
+
+usage = """
+python runlmm.py [options] command
+
+  runlmm.py processing multiplexer reads standard input types and calls the routines
+
+  Current commands are:
+
+    parse        : only parse input files
+
+  try --help for more information
+"""
+
+parser = OptionParser(usage=usage)
+# parser.add_option("-f", "--file", dest="input file",
+#                   help="In", metavar="FILE")
+parser.add_option("--kinship",dest="kinship",
+                  help="Kinship file format")
+parser.add_option("-q", "--quiet",
+                  action="store_false", dest="verbose", default=True,
+                  help="don't print status messages to stdout")
+
+(options, args) = parser.parse_args()
+
+if len(args) != 1:
+    print usage
+    sys.exit(1)
+
+cmd = args[0]
+print "Command: ",cmd
+
+if options.kinship:
+    K1 = []
+    print options.kinship
+    with open(options.kinship,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Kinship format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            ns = np.genfromtxt(row[1:])
+            K1.append(ns) # <--- slow
+    K = np.array(K1)
-- 
cgit v1.2.3


From 15b3b5b103fdee72a8dc0dac1c9c29e5680fbef9 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 7 Mar 2015 11:02:00 +0300
Subject: Kinship converter

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 35 +++++++++++++++++-----------
 1 file changed, 21 insertions(+), 14 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 012e96b2..2af84477 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -24,33 +24,38 @@ import numpy as np
 import input
 
 usage = """
-python convertlmm.py [--kinship] infile 
+python convertlmm.py [--kinship kfile] 
 
   Convert files for runlmm.py processing. Writes to stdout.
 
   try --help for more information
 """
 
-parser = OptionParser(usage=usage)
-# parser.add_option("-f", "--file", dest="input file",
-#                   help="In", metavar="FILE")
-parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
-                  help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
-parser.add_option("-q", "--quiet",
+# if len(args) == 0:
+#     print usage
+#     sys.exit(1)
+
+option_parser = OptionParser(usage=usage)
+option_parser.add_option("--kinship", dest="kinship",
+                  help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program")
+# option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
+#                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
+option_parser.add_option("--prefix", dest="prefix",
+                  help="Output prefix for output file(s)")
+option_parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
+option_parser.add_option("-v", "--verbose",
+                  action="store_true", dest="verbose", default=False,
+                  help="Print extra info")
 
-(options, args) = parser.parse_args()
-
-if len(args) == 0:
-    print usage
-    sys.exit(1)
+(options, args) = option_parser.parse_args()
 
 if options.kinship:
     is_header = True
-    assert(len(args)==1)
     count = 0
-    for line in open(args[0],'r'):
+    sys.stderr.write("Converting "+options.kinship+"\n")
+    for line in open(options.kinship,'r'):
         count += 1
         if is_header:
             size = len(line.split())
@@ -64,3 +69,5 @@ if options.kinship:
         sys.stdout.write("\t")
         sys.stdout.write("\t".join(line.split()))
         sys.stdout.write("\n")
+
+sys.stderr.write("Converting done\n")
-- 
cgit v1.2.3


From 11856132296916f7788437681bea9a2c71fe1e50 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 7 Mar 2015 12:21:29 +0300
Subject: Write phenotypes

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 72 ++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 10 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 2af84477..0604762e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -38,6 +38,8 @@ python convertlmm.py [--kinship kfile]
 option_parser = OptionParser(usage=usage)
 option_parser.add_option("--kinship", dest="kinship",
                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program")
+option_parser.add_option("--plink", dest="plink",
+                  help="Parse a phenotype file (PLINK style)")
 # option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
 #                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
 option_parser.add_option("--prefix", dest="prefix",
@@ -51,23 +53,73 @@ option_parser.add_option("-v", "--verbose",
 
 (options, args) = option_parser.parse_args()
 
+writer = None
+
+def msg(s):
+    sys.stderr.write("INFO: ")
+    sys.stderr.write(s)
+    sys.stderr.write("\n")
+    
+def wr(s):
+    if writer:
+        writer.write(s)
+    else:
+        sys.stdout.write(s)
+
+def wrln(s):
+    wr(s)
+    wr("\n")
+            
 if options.kinship:
     is_header = True
     count = 0
-    sys.stderr.write("Converting "+options.kinship+"\n")
+    msg("Converting "+options.kinship)
+    if options.prefix:
+        writer = open(options.prefix+".kin","w")
     for line in open(options.kinship,'r'):
         count += 1
         if is_header:
             size = len(line.split())
-            print "# Kinship format version 1.0"
-            print "# Size=",size
+            wrln("# Kinship format version 1.0")
+            wrln("# Size="+str(size))
             for i in range(size):
-                sys.stdout.write("\t"+str(i+1))
-            sys.stdout.write("\n")
+                wr("\t"+str(i+1))
+            wr("\n")
             is_header = False
-        sys.stdout.write(str(count))
-        sys.stdout.write("\t")
-        sys.stdout.write("\t".join(line.split()))
-        sys.stdout.write("\n")
+        wr(str(count))
+        wr("\t")
+        wr("\t".join(line.split()))
+        wr("\n")
+    msg(str(count)+" lines written")
+
+if options.plink:
+    # Because plink does not track size we need to read the whole thing first
+    msg("Converting "+options.plink)
+    phenos = []
+    count = 0
+    count_pheno = None
+    for line in open(options.plink,'r'):
+        count += 1
+        list = line.split()
+        pcount = len(list)-2
+        assert(pcount > 0)
+        if count_pheno == None:
+            count_pheno = pcount
+        assert(count_pheno == pcount)
+        row = [list[0]]+list[2:]
+        phenos.append(row)
 
-sys.stderr.write("Converting done\n")
+    if options.prefix:
+        writer = open(options.prefix+".pheno","w")
+    wrln("# Phenotype format version 1.0")
+    wrln("# Individuals = "+str(count))
+    wrln("# Phenotypes = "+str(count_pheno))
+    for i in range(count_pheno):
+        wr("\t"+str(i+1))
+        wr("\n")
+    for i in range(count):
+        wr("\t".join(phenos[i]))
+        wr("\n")
+    msg(str(count)+" lines written")
+            
+msg("Converting done")
-- 
cgit v1.2.3


From 9485cbbad2596b3105ad995e15ac3cb3dca6615d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 08:43:12 +0300
Subject: Parse new phenotype format

---
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 4a640d6d..27afd6f4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -41,7 +41,9 @@ parser = OptionParser(usage=usage)
 # parser.add_option("-f", "--file", dest="input file",
 #                   help="In", metavar="FILE")
 parser.add_option("--kinship",dest="kinship",
-                  help="Kinship file format")
+                  help="Kinship file format 1.0")
+parser.add_option("--pheno",dest="pheno",
+                  help="Phenotype file format 1.0")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -67,3 +69,19 @@ if options.kinship:
             ns = np.genfromtxt(row[1:])
             K1.append(ns) # <--- slow
     K = np.array(K1)
+
+if options.pheno:
+    Y1 = []
+    print options.pheno
+    with open(options.pheno,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Phenotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            ns = np.genfromtxt(row[1:])
+            Y1.append(ns) # <--- slow
+    Y = np.array(Y1)
+
+print Y
-- 
cgit v1.2.3


From d76d0fa9057aa774cc9675584b4737dda4dbf3a1 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 11:35:04 +0300
Subject: Parse new phenotype format

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 0604762e..3e216475 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -24,9 +24,9 @@ import numpy as np
 import input
 
 usage = """
-python convertlmm.py [--kinship kfile] 
+python convertlmm.py [--plink] [--prefix basename] [--kinship kfile] [--pheno] [--geno]
 
-  Convert files for runlmm.py processing. Writes to stdout.
+  Convert files for runlmm.py processing. Writes to stdout by default.
 
   try --help for more information
 """
@@ -38,8 +38,12 @@ python convertlmm.py [--kinship kfile]
 option_parser = OptionParser(usage=usage)
 option_parser.add_option("--kinship", dest="kinship",
                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program")
-option_parser.add_option("--plink", dest="plink",
-                  help="Parse a phenotype file (PLINK style)")
+option_parser.add_option("--pheno", dest="pheno",
+                         help="Parse a phenotype file (use with --plink only)")
+option_parser.add_option("--geno", dest="geno",
+                         help="Parse a genotype file (use with --plink only)")
+option_parser.add_option("--plink", dest="plink", default=False,
+                  help="Parse PLINK style")
 # option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
 #                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
 option_parser.add_option("--prefix", dest="prefix",
@@ -92,7 +96,9 @@ if options.kinship:
         wr("\n")
     msg(str(count)+" lines written")
 
-if options.plink:
+if options.pheno:
+    if not options.plink:
+        raise Exception("Use --plink switch")
     # Because plink does not track size we need to read the whole thing first
     msg("Converting "+options.plink)
     phenos = []
@@ -121,5 +127,6 @@ if options.plink:
         wr("\t".join(phenos[i]))
         wr("\n")
     msg(str(count)+" lines written")
-            
+
+    
 msg("Converting done")
-- 
cgit v1.2.3


From b955d8c4d2b1c6857905513a5de77039892fe18a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 11:44:17 +0300
Subject: convertlmm.py: plink support

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3e216475..3932a4d9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -24,7 +24,7 @@ import numpy as np
 import input
 
 usage = """
-python convertlmm.py [--plink] [--prefix basename] [--kinship kfile] [--pheno] [--geno]
+python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--pheno pname] [--geno gname]
 
   Convert files for runlmm.py processing. Writes to stdout by default.
 
@@ -42,7 +42,7 @@ option_parser.add_option("--pheno", dest="pheno",
                          help="Parse a phenotype file (use with --plink only)")
 option_parser.add_option("--geno", dest="geno",
                          help="Parse a genotype file (use with --plink only)")
-option_parser.add_option("--plink", dest="plink", default=False,
+option_parser.add_option("--plink", dest="plink", action="store_true", default=False,
                   help="Parse PLINK style")
 # option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
 #                   help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
@@ -78,6 +78,7 @@ if options.kinship:
     is_header = True
     count = 0
     msg("Converting "+options.kinship)
+    writer = None
     if options.prefix:
         writer = open(options.prefix+".kin","w")
     for line in open(options.kinship,'r'):
@@ -94,17 +95,17 @@ if options.kinship:
         wr("\t")
         wr("\t".join(line.split()))
         wr("\n")
-    msg(str(count)+" lines written")
+    msg(str(count)+" kinship lines written")
 
 if options.pheno:
     if not options.plink:
         raise Exception("Use --plink switch")
     # Because plink does not track size we need to read the whole thing first
-    msg("Converting "+options.plink)
+    msg("Converting "+options.pheno)
     phenos = []
     count = 0
     count_pheno = None
-    for line in open(options.plink,'r'):
+    for line in open(options.pheno,'r'):
         count += 1
         list = line.split()
         pcount = len(list)-2
@@ -115,6 +116,7 @@ if options.pheno:
         row = [list[0]]+list[2:]
         phenos.append(row)
 
+    writer = None
     if options.prefix:
         writer = open(options.prefix+".pheno","w")
     wrln("# Phenotype format version 1.0")
@@ -126,7 +128,7 @@ if options.pheno:
     for i in range(count):
         wr("\t".join(phenos[i]))
         wr("\n")
-    msg(str(count)+" lines written")
+    msg(str(count)+" pheno lines written")
 
     
 msg("Converting done")
-- 
cgit v1.2.3


From 0565d021e3c65256d53cfdfb32ccf55dacd1ecdf Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 12:41:32 +0300
Subject: convertlmm.py: options.geno

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3932a4d9..25011b18 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -21,7 +21,8 @@ import sys
 import os
 import numpy as np
 # from lmm import LMM, run_other
-import input
+# import input
+import plink
 
 usage = """
 python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--pheno pname] [--geno gname]
@@ -77,7 +78,7 @@ def wrln(s):
 if options.kinship:
     is_header = True
     count = 0
-    msg("Converting "+options.kinship)
+    msg("Converting kinship "+options.kinship)
     writer = None
     if options.prefix:
         writer = open(options.prefix+".kin","w")
@@ -101,7 +102,7 @@ if options.pheno:
     if not options.plink:
         raise Exception("Use --plink switch")
     # Because plink does not track size we need to read the whole thing first
-    msg("Converting "+options.pheno)
+    msg("Converting pheno "+options.pheno)
     phenos = []
     count = 0
     count_pheno = None
@@ -130,5 +131,12 @@ if options.pheno:
         wr("\n")
     msg(str(count)+" pheno lines written")
 
+if options.geno:
+    if not options.plink:
+        raise Exception("Use --plink switch")
+    # msg("Converting geno "+options.geno)
+    # plink.readbim(options.geno+'.bim')
+    msg("Converting geno "+options.geno+'.bed')
+    plink.readbed(options.geno+'.bed',1000,8)
     
 msg("Converting done")
-- 
cgit v1.2.3


From eca1af83ecbca6c0ac018514f322cc5b49cfa405 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 12:43:54 +0300
Subject: plink support

---
 wqflask/wqflask/my_pylmm/pyLMM/plink.py | 106 ++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/plink.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
new file mode 100644
index 00000000..53151b21
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -0,0 +1,106 @@
+# Plink module
+#
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+# Some of the BED file parsing came from pylmm:
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# According to the PLINK information
+
+# Parse a textual BIM file and return the contents as a list of tuples
+# 
+# Extended variant information file accompanying a .bed binary genotype table.
+# 
+# A text file with no header line, and one line per variant with the following six fields:
+# 
+#     Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name
+#     Variant identifier
+#     Position in morgans or centimorgans (safe to use dummy value of '0')
+#     Base-pair coordinate (normally 1-based, but 0 ok; limited to 231-2)
+#     Allele 1 (corresponding to clear bits in .bed; usually minor)
+#     Allele 2 (corresponding to set bits in .bed; usually major)
+# 
+# Allele codes can contain more than one character. Variants with negative bp coordinates are ignored by PLINK. Example
+#
+#     1 mm37-1-3125499  0 3125499 1 2
+#     1 mm37-1-3125701  0 3125701 1 2
+#     1 mm37-1-3187481  0 3187481 1 2
+
+import struct
+import numpy as np
+
+def readbim(fn):
+    res = []
+    for line in open(fn):
+        list = line.split()
+        if len([True for e in list if e == 'nan']) == 0:
+          res.append( (list[0],list[1],int(list[2]),int(list[3]),int(list[4]),int(list[5])) )
+    return res
+
+# .bed (PLINK binary biallelic genotype table)
+# 
+# Primary representation of genotype calls at biallelic variants. Must be accompanied by .bim and .fam files.
+# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bed
+# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam
+# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim
+
+def readbed(fn,inds,snps):
+    bytes = inds / 4 + (inds % 4 and 1 or 0)
+    format = 'c'*bytes
+    f = open(fn,'rb')
+    magic = f.read(3)
+    assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01")
+
+    def formatBinaryGenotypes(X):
+        D = { \
+              '00': 0.0, \
+              '10': 0.5, \
+              '11': 1.0, \
+              '01': np.nan \
+           }
+  
+        D_tped = { \
+              '00': '1 1', \
+              '10': '1 2', \
+              '11': '2 2', \
+              '01': '0 0' \
+           }
+  
+        #D = D_tped
+              
+        G = []
+        for x in X:
+            if not len(x) == 10:
+                xx = x[2:]
+                x = '0b' + '0'*(8 - len(xx)) + xx
+            a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) 
+            L = [D[y] for y in [a,b,c,d]]
+            G += L
+        # only take the leading values because whatever is left should be null
+        G = G[:inds]
+        G = np.array(G)
+        # if normalize:
+        #     G = self.normalizeGenotype(G)
+        return G
+
+    def next():
+        X = f.read(bytes)
+        XX = [bin(ord(x)) for x in struct.unpack(format,X)]
+        # snp = 0
+        # snp = self.snpFileHandle.readline().strip().split()[1]
+        return formatBinaryGenotypes(XX)
+
+    for x in range(snps):
+        print next()
-- 
cgit v1.2.3


From aaf0f336b69fb1fb77c9dd3782ba4db0d732106c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 13:38:03 +0300
Subject: plink: no need to know the number of SNPs, but you need INDs

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 17 +++++++--
 wqflask/wqflask/my_pylmm/pyLMM/plink.py      | 56 ++++++++++++++--------------
 2 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 25011b18..d374407e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -59,6 +59,7 @@ option_parser.add_option("-v", "--verbose",
 (options, args) = option_parser.parse_args()
 
 writer = None
+num_inds = None
 
 def msg(s):
     sys.stderr.write("INFO: ")
@@ -96,6 +97,7 @@ if options.kinship:
         wr("\t")
         wr("\t".join(line.split()))
         wr("\n")
+    num_inds = count
     msg(str(count)+" kinship lines written")
 
 if options.pheno:
@@ -129,14 +131,23 @@ if options.pheno:
     for i in range(count):
         wr("\t".join(phenos[i]))
         wr("\n")
+    num_inds = count
     msg(str(count)+" pheno lines written")
 
 if options.geno:
     if not options.plink:
         raise Exception("Use --plink switch")
+    if not num_inds:
+        raise Exception("Can not figure out the number of individuals, use --pheno or --kinship")    
     # msg("Converting geno "+options.geno)
-    # plink.readbim(options.geno+'.bim')
+
+    snps = plink.readbim(options.geno+'.bim')
     msg("Converting geno "+options.geno+'.bed')
-    plink.readbed(options.geno+'.bed',1000,8)
-    
+
+    def out(i,x):
+        print i,x
+        
+    snps = plink.readbed(options.geno+'.bed',num_inds, out)
+    msg(str(count)+" geno lines written (with "+str(snps)+" snps)")
+   
 msg("Converting done")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
index 53151b21..3e850c9b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -51,19 +51,24 @@ def readbim(fn):
 
 # .bed (PLINK binary biallelic genotype table)
 # 
-# Primary representation of genotype calls at biallelic variants. Must be accompanied by .bim and .fam files.
+# Primary representation of genotype calls at biallelic variants. Must
+# be accompanied by .bim and .fam files. Basically contains num SNP
+# blocks containing IND (compressed 4 IND into a byte)
+#
+# Since it is a biallelic format it supports for every individual
+# whether the first allele is homozygous (b00), the second allele is
+# homozygous (b11), it is heterozygous (b10) or that it is missing
+# (b01).
+
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bed
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim
 
-def readbed(fn,inds,snps):
-    bytes = inds / 4 + (inds % 4 and 1 or 0)
-    format = 'c'*bytes
-    f = open(fn,'rb')
-    magic = f.read(3)
-    assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01")
+def readbed(fn,inds,func=None):
 
-    def formatBinaryGenotypes(X):
+    # For every SNP block fetch the individual genotypes using values
+    # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles
+    def fetchGenotypes(X):
         D = { \
               '00': 0.0, \
               '10': 0.5, \
@@ -71,15 +76,6 @@ def readbed(fn,inds,snps):
               '01': np.nan \
            }
   
-        D_tped = { \
-              '00': '1 1', \
-              '10': '1 2', \
-              '11': '2 2', \
-              '01': '0 0' \
-           }
-  
-        #D = D_tped
-              
         G = []
         for x in X:
             if not len(x) == 10:
@@ -88,19 +84,21 @@ def readbed(fn,inds,snps):
             a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) 
             L = [D[y] for y in [a,b,c,d]]
             G += L
-        # only take the leading values because whatever is left should be null
         G = G[:inds]
         G = np.array(G)
-        # if normalize:
-        #     G = self.normalizeGenotype(G)
         return G
 
-    def next():
-        X = f.read(bytes)
-        XX = [bin(ord(x)) for x in struct.unpack(format,X)]
-        # snp = 0
-        # snp = self.snpFileHandle.readline().strip().split()[1]
-        return formatBinaryGenotypes(XX)
-
-    for x in range(snps):
-        print next()
+    bytes = inds / 4 + (inds % 4 and 1 or 0)
+    format = 'c'*bytes
+    count = 0
+    with open(fn,'rb') as f:
+        magic = f.read(3)
+        assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01")
+        while True:
+            count += 1
+            X = f.read(bytes)
+            if not X:
+                return(count-1)
+            XX = [bin(ord(x)) for x in struct.unpack(format,X)]
+            xs = fetchGenotypes(XX)
+            func(count,xs)
-- 
cgit v1.2.3


From 0919dd489deeb736b08c50d4b7636b6af00d92c8 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 13:51:13 +0300
Subject: Not using numpy - is part of the caller

---
 wqflask/wqflask/my_pylmm/pyLMM/plink.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
index 3e850c9b..75d51c3b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -39,7 +39,7 @@
 #     1 mm37-1-3187481  0 3187481 1 2
 
 import struct
-import numpy as np
+# import numpy as np
 
 def readbim(fn):
     res = []
@@ -73,7 +73,7 @@ def readbed(fn,inds,func=None):
               '00': 0.0, \
               '10': 0.5, \
               '11': 1.0, \
-              '01': np.nan \
+              '01': float('nan') \
            }
   
         G = []
@@ -85,7 +85,7 @@ def readbed(fn,inds,func=None):
             L = [D[y] for y in [a,b,c,d]]
             G += L
         G = G[:inds]
-        G = np.array(G)
+        # G = np.array(G)
         return G
 
     bytes = inds / 4 + (inds % 4 and 1 or 0)
-- 
cgit v1.2.3


From 60ef29c2b003f9d9a76d901ca4107b8cce3b9799 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 13:54:24 +0300
Subject: Now we can use print in lamdba

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index d374407e..173bae2e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -16,6 +16,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from __future__ import print_function
 from optparse import OptionParser
 import sys
 import os
@@ -144,10 +145,10 @@ if options.geno:
     snps = plink.readbim(options.geno+'.bim')
     msg("Converting geno "+options.geno+'.bed')
 
-    def out(i,x):
-        print i,x
+    # def out(i,x):
+    #     print i,x
         
-    snps = plink.readbed(options.geno+'.bed',num_inds, out)
+    snps = plink.readbed(options.geno+'.bed',num_inds, lambda i,x: print(i,x))
     msg(str(count)+" geno lines written (with "+str(snps)+" snps)")
    
 msg("Converting done")
-- 
cgit v1.2.3


From d924bc5842d2813aad8359b9cb614c2a87df1ee9 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 14:37:04 +0300
Subject: plink: missing values in bim

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 27 +++++++++++++++++++--------
 wqflask/wqflask/my_pylmm/pyLMM/plink.py      |  2 ++
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 173bae2e..d04f9b94 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -136,19 +136,30 @@ if options.pheno:
     msg(str(count)+" pheno lines written")
 
 if options.geno:
+    msg("Converting geno "+options.geno+'.bed')
     if not options.plink:
         raise Exception("Use --plink switch")
     if not num_inds:
         raise Exception("Can not figure out the number of individuals, use --pheno or --kinship")    
-    # msg("Converting geno "+options.geno)
-
-    snps = plink.readbim(options.geno+'.bim')
-    msg("Converting geno "+options.geno+'.bed')
-
-    # def out(i,x):
-    #     print i,x
+    num_snps = plink.readbim(options.geno+'.bim')
+    writer = None
+    if options.prefix:
+        writer = open(options.prefix+".geno","w")
+    wrln("# Genotype format version 1.0")
+    wrln("# Individuals = "+str(num_inds))
+    wrln("# Phenotypes = "+str(len(num_snps)))
+
+    def out(i,x):
+        # print(i,x)
+        pass
         
-    snps = plink.readbed(options.geno+'.bed',num_inds, lambda i,x: print(i,x))
+    snps = plink.readbed(options.geno+'.bed',num_inds, out)
+    # for i in range(num_snps):
+    #     wr("\t"+str(i+1))
+    #     wr("\n")
+    # for i in range(count):
+    #     wr("\t".join(phenos[i]))
+    #     wr("\n")
     msg(str(count)+" geno lines written (with "+str(snps)+" snps)")
    
 msg("Converting done")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
index 75d51c3b..4ad2c5f7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -47,6 +47,8 @@ def readbim(fn):
         list = line.split()
         if len([True for e in list if e == 'nan']) == 0:
           res.append( (list[0],list[1],int(list[2]),int(list[3]),int(list[4]),int(list[5])) )
+        else:
+          res.append( (list[0],list[1],list[2],float('nan'),float('nan'),float('nan')) )
     return res
 
 # .bed (PLINK binary biallelic genotype table)
-- 
cgit v1.2.3


From f92657b08cb2dab8b24c69dee525f52c125713a3 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 15:34:16 +0300
Subject: Write genotype file (start)

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index d04f9b94..7376a1d5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -150,8 +150,10 @@ if options.geno:
     wrln("# Phenotypes = "+str(len(num_snps)))
 
     def out(i,x):
-        # print(i,x)
-        pass
+        wr(str(i)+"\t")
+        for v in x:
+          wr("\t".join(str(v)))
+        wr("\n")
         
     snps = plink.readbed(options.geno+'.bed',num_inds, out)
     # for i in range(num_snps):
-- 
cgit v1.2.3


From 02cb9710a83bc81abe198cb089dc5a70baf1b657 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 15:37:39 +0300
Subject: Write genotype file (start)

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 7376a1d5..a3728bd2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -151,8 +151,8 @@ if options.geno:
 
     def out(i,x):
         wr(str(i)+"\t")
-        for v in x:
-          wr("\t".join(str(v)))
+        lst = [str(v) for v in x]
+        wr("\t".join(lst))
         wr("\n")
         
     snps = plink.readbed(options.geno+'.bed',num_inds, out)
-- 
cgit v1.2.3


From 749cf072da1849d926ab4d74e288ddf582e84c5a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 16:47:33 +0300
Subject: Write genotype file non-transposed

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 12 ++++++------
 wqflask/wqflask/my_pylmm/pyLMM/plink.py      | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index a3728bd2..f35cc47c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -148,17 +148,17 @@ if options.geno:
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
     wrln("# Phenotypes = "+str(len(num_snps)))
+    for i in range(len(num_snps)):
+        wr("\t"+str(i+1))
+        wr("\n")
 
+    m = []
     def out(i,x):
         wr(str(i)+"\t")
-        lst = [str(v) for v in x]
-        wr("\t".join(lst))
+        wr("\t".join(x))
         wr("\n")
         
-    snps = plink.readbed(options.geno+'.bed',num_inds, out)
-    # for i in range(num_snps):
-    #     wr("\t"+str(i+1))
-    #     wr("\n")
+    snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','NA'), out)
     # for i in range(count):
     #     wr("\t".join(phenos[i]))
     #     wr("\n")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
index 4ad2c5f7..7bd2df91 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
@@ -66,25 +66,26 @@ def readbim(fn):
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam
 # http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim
 
-def readbed(fn,inds,func=None):
+def readbed(fn,inds,encoding,func=None):
 
     # For every SNP block fetch the individual genotypes using values
     # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles
     def fetchGenotypes(X):
-        D = { \
-              '00': 0.0, \
-              '10': 0.5, \
-              '11': 1.0, \
-              '01': float('nan') \
-           }
-  
+        # D = { \
+        #      '00': 0.0, \
+        #      '10': 0.5, \
+        #      '11': 1.0, \
+        #      '01': float('nan') \
+        #   }
+
+        Didx = { '00': 0, '10': 1, '11': 2, '01': 3 }
         G = []
         for x in X:
             if not len(x) == 10:
                 xx = x[2:]
                 x = '0b' + '0'*(8 - len(xx)) + xx
             a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) 
-            L = [D[y] for y in [a,b,c,d]]
+            L = [encoding[Didx[y]] for y in [a,b,c,d]]
             G += L
         G = G[:inds]
         # G = np.array(G)
-- 
cgit v1.2.3


From f7668a8b9fe552b9d908e2c68367e660d3a81482 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 9 Mar 2015 17:14:19 +0300
Subject: Add HAB encoding

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 24 +++++++++++++++---------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py     | 20 +++++++++++++++++++-
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index f35cc47c..8a1f03ad 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -148,20 +148,26 @@ if options.geno:
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
     wrln("# Phenotypes = "+str(len(num_snps)))
-    for i in range(len(num_snps)):
+    for i in range(num_inds):
         wr("\t"+str(i+1))
-        wr("\n")
+    wr("\n")
 
     m = []
     def out(i,x):
-        wr(str(i)+"\t")
-        wr("\t".join(x))
-        wr("\n")
+        # wr(str(i)+"\t")
+        # wr("\t".join(x))
+        # wr("\n")
+        m.append(x)
         
-    snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','NA'), out)
-    # for i in range(count):
-    #     wr("\t".join(phenos[i]))
-    #     wr("\n")
+    snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','-'), out)
+
+    msg("Write transposed genotype matrix")
+    for g in range(len(num_snps)):
+        wr(str(g+1)+"\t")
+        for i in range(num_inds):
+            wr(m[g][i])
+        wr("\n")
+            
     msg(str(count)+" geno lines written (with "+str(snps)+" snps)")
    
 msg("Converting done")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 27afd6f4..4398926f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -44,6 +44,8 @@ parser.add_option("--kinship",dest="kinship",
                   help="Kinship file format 1.0")
 parser.add_option("--pheno",dest="pheno",
                   help="Phenotype file format 1.0")
+parser.add_option("--geno",dest="geno",
+                  help="Genotype file format 1.0")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -84,4 +86,20 @@ if options.pheno:
             Y1.append(ns) # <--- slow
     Y = np.array(Y1)
 
-print Y
+if options.geno:
+    G1 = []
+    print options.geno
+    with open(options.geno,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            print(row)
+            # ns = np.genfromtxt(row[1:])
+            G1.append(ns) # <--- slow
+    G = np.array(G1)
+
+print G
-- 
cgit v1.2.3


From 5528b84927e8e27ad4c13621272bb4bee4a9d694 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 11:17:31 +0300
Subject: Turn HAB encoding into pylmm genotyping

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py |  3 ++-
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py     | 15 ++++++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 8a1f03ad..89c09b1e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -147,7 +147,8 @@ if options.geno:
         writer = open(options.prefix+".geno","w")
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
-    wrln("# Phenotypes = "+str(len(num_snps)))
+    wrln("# SNPs = "+str(len(num_snps)))
+    wrln("# Encoding = HAB")
     for i in range(num_inds):
         wr("\t"+str(i+1))
     wr("\n")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 4398926f..ce8e32be 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -28,7 +28,8 @@ import csv
 usage = """
 python runlmm.py [options] command
 
-  runlmm.py processing multiplexer reads standard input types and calls the routines
+  runlmm.py processing multiplexer reads standardised input formats
+  and calls the different routines
 
   Current commands are:
 
@@ -88,6 +89,9 @@ if options.pheno:
 
 if options.geno:
     G1 = []
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
     print options.geno
     with open(options.geno,'r') as tsvin:
         assert(tsvin.readline().strip() == "# Genotype format version 1.0")
@@ -97,9 +101,14 @@ if options.geno:
         tsvin.readline()
         tsv = csv.reader(tsvin, delimiter='\t')
         for row in tsv:
-            print(row)
+            # print(row)
+            id = row[0]
+            gs = list(row[1])
+            # print id,gs
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            # print id,gs2
             # ns = np.genfromtxt(row[1:])
-            G1.append(ns) # <--- slow
+            G1.append(gs2) # <--- slow
     G = np.array(G1)
 
 print G
-- 
cgit v1.2.3


From 5489f84854c6197d4cf532238de8d90b3d6fcb5e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 11:38:35 +0300
Subject: convertlmm.py: add SNP names

---
 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 58 +++++++++++++++-------------
 1 file changed, 31 insertions(+), 27 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 89c09b1e..3b6b5d70 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -61,6 +61,8 @@ option_parser.add_option("-v", "--verbose",
 
 writer = None
 num_inds = None
+snp_names = []
+ind_names = []
 
 def msg(s):
     sys.stderr.write("INFO: ")
@@ -77,29 +79,6 @@ def wrln(s):
     wr(s)
     wr("\n")
             
-if options.kinship:
-    is_header = True
-    count = 0
-    msg("Converting kinship "+options.kinship)
-    writer = None
-    if options.prefix:
-        writer = open(options.prefix+".kin","w")
-    for line in open(options.kinship,'r'):
-        count += 1
-        if is_header:
-            size = len(line.split())
-            wrln("# Kinship format version 1.0")
-            wrln("# Size="+str(size))
-            for i in range(size):
-                wr("\t"+str(i+1))
-            wr("\n")
-            is_header = False
-        wr(str(count))
-        wr("\t")
-        wr("\t".join(line.split()))
-        wr("\n")
-    num_inds = count
-    msg(str(count)+" kinship lines written")
 
 if options.pheno:
     if not options.plink:
@@ -135,19 +114,44 @@ if options.pheno:
     num_inds = count
     msg(str(count)+" pheno lines written")
 
+if options.kinship:
+    is_header = True
+    count = 0
+    msg("Converting kinship "+options.kinship)
+    writer = None
+    if options.prefix:
+        writer = open(options.prefix+".kin","w")
+    for line in open(options.kinship,'r'):
+        count += 1
+        if is_header:
+            size = len(line.split())
+            wrln("# Kinship format version 1.0")
+            wrln("# Size="+str(size))
+            for i in range(size):
+                wr("\t"+str(i+1))
+            wr("\n")
+            is_header = False
+        wr(str(count))
+        wr("\t")
+        wr("\t".join(line.split()))
+        wr("\n")
+    num_inds = count
+    msg(str(count)+" kinship lines written")
+    
 if options.geno:
     msg("Converting geno "+options.geno+'.bed')
     if not options.plink:
         raise Exception("Use --plink switch")
     if not num_inds:
         raise Exception("Can not figure out the number of individuals, use --pheno or --kinship")    
-    num_snps = plink.readbim(options.geno+'.bim')
+    bim_snps = plink.readbim(options.geno+'.bim')
+    num_snps = len(bim_snps)
     writer = None
     if options.prefix:
         writer = open(options.prefix+".geno","w")
     wrln("# Genotype format version 1.0")
     wrln("# Individuals = "+str(num_inds))
-    wrln("# SNPs = "+str(len(num_snps)))
+    wrln("# SNPs = "+str(num_snps))
     wrln("# Encoding = HAB")
     for i in range(num_inds):
         wr("\t"+str(i+1))
@@ -163,8 +167,8 @@ if options.geno:
     snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','-'), out)
 
     msg("Write transposed genotype matrix")
-    for g in range(len(num_snps)):
-        wr(str(g+1)+"\t")
+    for g in range(num_snps):
+        wr(bim_snps[g][1]+"\t")
         for i in range(num_inds):
             wr(m[g][i])
         wr("\n")
-- 
cgit v1.2.3


From 7d566c7bed8887dc18c1311ee84d22a8dc520889 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 12:00:17 +0300
Subject: Factoring out tsvreaders

---
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py    | 58 ++---------------------
 wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 73 +++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 53 deletions(-)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ce8e32be..f5ecddb9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -18,12 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from optparse import OptionParser
-import sys
-import os
-import numpy as np
-# from lmm import LMM, run_other
-import csv
-
+import tsvreader
 
 usage = """
 python runlmm.py [options] command
@@ -38,6 +33,7 @@ python runlmm.py [options] command
   try --help for more information
 """
 
+
 parser = OptionParser(usage=usage)
 # parser.add_option("-f", "--file", dest="input file",
 #                   help="In", metavar="FILE")
@@ -61,54 +57,10 @@ cmd = args[0]
 print "Command: ",cmd
 
 if options.kinship:
-    K1 = []
-    print options.kinship
-    with open(options.kinship,'r') as tsvin:
-        assert(tsvin.readline().strip() == "# Kinship format version 1.0")
-        tsvin.readline()
-        tsvin.readline()
-        tsv = csv.reader(tsvin, delimiter='\t')
-        for row in tsv:
-            ns = np.genfromtxt(row[1:])
-            K1.append(ns) # <--- slow
-    K = np.array(K1)
+    k = tsvreader.kinship()
 
 if options.pheno:
-    Y1 = []
-    print options.pheno
-    with open(options.pheno,'r') as tsvin:
-        assert(tsvin.readline().strip() == "# Phenotype format version 1.0")
-        tsvin.readline()
-        tsvin.readline()
-        tsvin.readline()
-        tsv = csv.reader(tsvin, delimiter='\t')
-        for row in tsv:
-            ns = np.genfromtxt(row[1:])
-            Y1.append(ns) # <--- slow
-    Y = np.array(Y1)
+    y = tsvreader.pheno()
 
 if options.geno:
-    G1 = []
-    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
-    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
-
-    print options.geno
-    with open(options.geno,'r') as tsvin:
-        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
-        tsvin.readline()
-        tsvin.readline()
-        tsvin.readline()
-        tsvin.readline()
-        tsv = csv.reader(tsvin, delimiter='\t')
-        for row in tsv:
-            # print(row)
-            id = row[0]
-            gs = list(row[1])
-            # print id,gs
-            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
-            # print id,gs2
-            # ns = np.genfromtxt(row[1:])
-            G1.append(gs2) # <--- slow
-    G = np.array(G1)
-
-print G
+    g = tsvreader.geno()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
new file mode 100644
index 00000000..7cea976e
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -0,0 +1,73 @@
+# Standard file readers
+#
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import os
+import numpy as np
+import csv
+
+def kinship():
+    K1 = []
+    print options.kinship
+    with open(options.kinship,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Kinship format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            ns = np.genfromtxt(row[1:])
+            K1.append(ns) # <--- slow
+    K = np.array(K1)
+
+def pheno():
+    Y1 = []
+    print options.pheno
+    with open(options.pheno,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Phenotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            ns = np.genfromtxt(row[1:])
+            Y1.append(ns) # <--- slow
+    Y = np.array(Y1)
+
+def geno():
+    G1 = []
+    hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+    pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+    print options.geno
+    with open(options.geno,'r') as tsvin:
+        assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsvin.readline()
+        tsv = csv.reader(tsvin, delimiter='\t')
+        for row in tsv:
+            # print(row)
+            id = row[0]
+            gs = list(row[1])
+            # print id,gs
+            gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+            # print id,gs2
+            # ns = np.genfromtxt(row[1:])
+            G1.append(gs2) # <--- slow
+    G = np.array(G1)
+
-- 
cgit v1.2.3


From 926225cf75991142e82d95b2505c59446fa1a8c4 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 12:05:50 +0300
Subject: Refactored tsvreader out

---
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py    |  9 ++++++---
 wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 21 ++++++++++++---------
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index f5ecddb9..bed2e39e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -57,10 +57,13 @@ cmd = args[0]
 print "Command: ",cmd
 
 if options.kinship:
-    k = tsvreader.kinship()
+    k = tsvreader.kinship(options.kinship)
+    print len(k)
 
 if options.pheno:
-    y = tsvreader.pheno()
+    y = tsvreader.pheno(options.pheno)
+    print len(y)
 
 if options.geno:
-    g = tsvreader.geno()
+    g = tsvreader.geno(options.geno)
+    print len(g)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index 7cea976e..b4027fa3 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -20,10 +20,10 @@ import os
 import numpy as np
 import csv
 
-def kinship():
+def kinship(fn):
     K1 = []
-    print options.kinship
-    with open(options.kinship,'r') as tsvin:
+    print fn
+    with open(fn,'r') as tsvin:
         assert(tsvin.readline().strip() == "# Kinship format version 1.0")
         tsvin.readline()
         tsvin.readline()
@@ -32,11 +32,12 @@ def kinship():
             ns = np.genfromtxt(row[1:])
             K1.append(ns) # <--- slow
     K = np.array(K1)
+    return K
 
-def pheno():
+def pheno(fn):
     Y1 = []
-    print options.pheno
-    with open(options.pheno,'r') as tsvin:
+    print fn
+    with open(fn,'r') as tsvin:
         assert(tsvin.readline().strip() == "# Phenotype format version 1.0")
         tsvin.readline()
         tsvin.readline()
@@ -46,14 +47,15 @@ def pheno():
             ns = np.genfromtxt(row[1:])
             Y1.append(ns) # <--- slow
     Y = np.array(Y1)
+    return Y
 
-def geno():
+def geno(fn):
     G1 = []
     hab_mapper = {'A':0,'H':1,'B':2,'-':3}
     pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
 
-    print options.geno
-    with open(options.geno,'r') as tsvin:
+    print fn
+    with open(fn,'r') as tsvin:
         assert(tsvin.readline().strip() == "# Genotype format version 1.0")
         tsvin.readline()
         tsvin.readline()
@@ -70,4 +72,5 @@ def geno():
             # ns = np.genfromtxt(row[1:])
             G1.append(gs2) # <--- slow
     G = np.array(G1)
+    return G
 
-- 
cgit v1.2.3


From 4902e2a8f4a4c9e08272dbd752eeb61f0e6b60f2 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 12:47:27 +0300
Subject: Populationg Redis from TSV data

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py    | 49 ++++++++++++++++++++++----------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 11 +++++--
 2 files changed, 42 insertions(+), 18 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 3cbab149..8c6e30e8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -719,23 +719,11 @@ class LMM:
        pl.ylabel("Probability of data")
        pl.title(title)
 
-# This is the main function used by Genenetwork2 (with environment)
-def gn2_main():
-    parser = argparse.ArgumentParser(description='Run pyLMM')
-    parser.add_argument('-k', '--key')
-    parser.add_argument('-s', '--species')
-    
-    opts = parser.parse_args()
-    
-    key = opts.key
-    species = opts.species
-    
+
+def gn2_redis(key,species):
     json_params = Redis.get(key)
     
     params = json.loads(json_params)
-    #print("params:", params)
-    
-    #print("kinship_matrix:", params['kinship_matrix'])
     
     tempdata = temp_data.TempData(params['temp_uuid'])
     if species == "human" :
@@ -760,7 +748,38 @@ def gn2_main():
     #Pushing json_results into a list where it is the only item because blpop needs a list
     Redis.rpush(results_key, json_results)
     Redis.expire(results_key, 60*60)
-        
+
+# This is the main function used by Genenetwork2 (with environment)
+def gn2_main():
+    parser = argparse.ArgumentParser(description='Run pyLMM')
+    parser.add_argument('-k', '--key')
+    parser.add_argument('-s', '--species')
+    
+    opts = parser.parse_args()
+    
+    key = opts.key
+    species = opts.species
+
+    gn2_redis(key,species)
+
+def gn2_load_redis(key,species,kinship,pheno,geno):
+    print("Loading Redis from parsed data")
+    params = dict(pheno_vector = pheno.tolist(),
+                  genotype_matrix = geno.tolist(),
+                  restricted_max_likelihood = True,
+                  refit = False,
+                  temp_uuid = "testrun_temp_uuid",
+                        
+                  # meta data
+                  timestamp = datetime.datetime.now().isoformat(),
+    )
+            
+    json_params = json.dumps(params)
+    Redis.set(key, json_params)
+    Redis.expire(key, 60*60)
+
+    gn2_redis(key,species)
+    
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
     if has_gn2:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index bed2e39e..525c43fc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -19,6 +19,7 @@
 
 from optparse import OptionParser
 import tsvreader
+from lmm import gn2_load_redis
 
 usage = """
 python runlmm.py [options] command
@@ -29,6 +30,7 @@ python runlmm.py [options] command
   Current commands are:
 
     parse        : only parse input files
+    redis        : use Redis to call into GN2
 
   try --help for more information
 """
@@ -58,12 +60,15 @@ print "Command: ",cmd
 
 if options.kinship:
     k = tsvreader.kinship(options.kinship)
-    print len(k)
+    print k.shape
 
 if options.pheno:
     y = tsvreader.pheno(options.pheno)
-    print len(y)
+    print y.shape
 
 if options.geno:
     g = tsvreader.geno(options.geno)
-    print len(g)
+    print g.shape
+
+if cmd == 'redis':
+    gn2_load_redis('testrun','other',k,y,g.T)
-- 
cgit v1.2.3


From d6a1f63d08645ff680dad9880030f58881a127d9 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 15:15:48 +0300
Subject: Missing genotypes and normalize

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py    |  9 ++++++++-
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 25 ++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8c6e30e8..00bbf144 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -726,6 +726,11 @@ def gn2_redis(key,species):
     params = json.loads(json_params)
     
     tempdata = temp_data.TempData(params['temp_uuid'])
+
+    print('kinship', np.array(params['kinship_matrix'][0:10][0:10]))
+    print('pheno', params['pheno_vector'][0:10])
+    print('geno', params['genotype_matrix'][0:10][0:10])
+    
     if species == "human" :
         ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
                   covariate_matrix = np.array(params['covariate_matrix']),
@@ -748,6 +753,7 @@ def gn2_redis(key,species):
     #Pushing json_results into a list where it is the only item because blpop needs a list
     Redis.rpush(results_key, json_results)
     Redis.expire(results_key, 60*60)
+    return ps, ts
 
 # This is the main function used by Genenetwork2 (with environment)
 def gn2_main():
@@ -766,6 +772,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno):
     print("Loading Redis from parsed data")
     params = dict(pheno_vector = pheno.tolist(),
                   genotype_matrix = geno.tolist(),
+                  kinship_matrix= kinship.tolist(),
                   restricted_max_likelihood = True,
                   refit = False,
                   temp_uuid = "testrun_temp_uuid",
@@ -778,7 +785,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno):
     Redis.set(key, json_params)
     Redis.expire(key, 60*60)
 
-    gn2_redis(key,species)
+    return gn2_redis(key,species)
     
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 525c43fc..6db1bdbc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -18,7 +18,9 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from optparse import OptionParser
+import sys
 import tsvreader
+import numpy as np
 from lmm import gn2_load_redis
 
 usage = """
@@ -70,5 +72,26 @@ if options.geno:
     g = tsvreader.geno(options.geno)
     print g.shape
 
+def normalizeGenotype(G):
+    x = True - np.isnan(G)
+    m = G[x].mean()
+    s = np.sqrt(G[x].var())
+    G[np.isnan(G)] = m
+    if s == 0: G = G - m
+    else: G = (G - m) / s
+    return G
+
+gT = normalizeGenotype(g.T)
+
+# Remove individuals with missing phenotypes
+v = np.isnan(y)
+keep = True - v
+if v.sum():
+   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
+   y  = y[keep]
+   gT = gT[keep,:]
+   k = k[keep,:][:,keep]
+
 if cmd == 'redis':
-    gn2_load_redis('testrun','other',k,y,g.T)
+    ps, ts = gn2_load_redis('testrun','other',k,y,gT)
+    print ps[0:10],ps[-10:]
-- 
cgit v1.2.3


From f116f59923ebe3fc52c27fac8fd571a70e323a68 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 15:25:00 +0300
Subject: lmm.py: debugging output

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 00bbf144..c2271611 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -727,9 +727,9 @@ def gn2_redis(key,species):
     
     tempdata = temp_data.TempData(params['temp_uuid'])
 
-    print('kinship', np.array(params['kinship_matrix'][0:10][0:10]))
-    print('pheno', params['pheno_vector'][0:10])
-    print('geno', params['genotype_matrix'][0:10][0:10])
+    print('kinship', np.array(params['kinship_matrix']))
+    print('pheno', np.array(params['pheno_vector']))
+    print('geno', np.array(params['genotype_matrix']))
     
     if species == "human" :
         ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
-- 
cgit v1.2.3


From 6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 10 Mar 2015 17:18:26 +0300
Subject: Normalisation

---
 wqflask/wqflask/my_pylmm/pyLMM/input.py  |  4 +++-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py    |  5 +++--
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 38 +++++++++++++++++++++++---------
 3 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index fdf02e42..f7b556a5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -133,13 +133,15 @@ class plink:
         return G
     
     def normalizeGenotype(self,G):
+        # print "Before",G
+        # print G.shape
         x = True - np.isnan(G)
         m = G[x].mean()
         s = np.sqrt(G[x].var())
         G[np.isnan(G)] = m
         if s == 0: G = G - m
         else: G = (G - m) / s
-     
+        # print "After",G
         return G
     
     def getPhenos(self,phenoFile=None):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index c2271611..ab19bf08 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -333,8 +333,8 @@ def calculate_kinship(genotype_matrix, temp_data, is_testing=False):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    print("genotype matrix n is:", n)
-    print("genotype matrix m is:", m)
+    print("genotype 2D matrix n (inds) is:", n)
+    print("genotype 2D matrix m (snps) is:", m)
     keep = []
     for counter in range(m):
         if is_testing and counter>8:
@@ -730,6 +730,7 @@ def gn2_redis(key,species):
     print('kinship', np.array(params['kinship_matrix']))
     print('pheno', np.array(params['pheno_vector']))
     print('geno', np.array(params['genotype_matrix']))
+    # sys.exit(1)
     
     if species == "human" :
         ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6db1bdbc..907a6835 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -73,15 +73,33 @@ if options.geno:
     print g.shape
 
 def normalizeGenotype(G):
-    x = True - np.isnan(G)
-    m = G[x].mean()
-    s = np.sqrt(G[x].var())
-    G[np.isnan(G)] = m
-    if s == 0: G = G - m
-    else: G = (G - m) / s
+    # Run for every SNP list (num individuals)
+    x = True - np.isnan(G)  # Matrix of True/False
+    m = G[x].mean()         # Global mean value
+    print m
+    s = np.sqrt(G[x].var()) # Global stddev
+    print s
+    G[np.isnan(G)] = m      # Plug-in mean values for missing
+    if s == 0:
+        G = G - m           # Subtract the mean
+    else:
+        G = (G - m) / s     # Normalize the deviation
     return G
 
-gT = normalizeGenotype(g.T)
+# g = g.reshape((1, -1))[0]
+print("Before",g)
+gn = []
+for snp in g:
+    gn.append( normalizeGenotype(snp) )
+
+gn = np.array(gn)
+print("After1",gn)
+gnT = gn.T
+print("After",gnT)
+# G = gnT
+G = gnT
+print "G shape",G.shape
+# assert(G[0,0]==-2.25726341)
 
 # Remove individuals with missing phenotypes
 v = np.isnan(y)
@@ -89,9 +107,9 @@ keep = True - v
 if v.sum():
    if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
    y  = y[keep]
-   gT = gT[keep,:]
+   G = G[keep,:]
    k = k[keep,:][:,keep]
 
 if cmd == 'redis':
-    ps, ts = gn2_load_redis('testrun','other',k,y,gT)
-    print ps[0:10],ps[-10:]
+    ps, ts = gn2_load_redis('testrun','other',k,y,G)
+    print np.array(ps)
-- 
cgit v1.2.3


From a5d0d42f6977c4473c28f12999a62ad360750041 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 11 Mar 2015 12:07:54 +0300
Subject: & Added testing option, now the results match

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py    | 16 ++++++++++------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 11 ++++++++---
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index ab19bf08..5ffb9a4c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -267,6 +267,7 @@ def run_other(pheno_vector,
     """
     
     print("In run_other")
+    print("REML=",restricted_max_likelihood," REFIT=",refit, "TESTING=",is_testing)
     with Bench("Calculate Kinship"):
         kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing)
     
@@ -280,6 +281,7 @@ def run_other(pheno_vector,
         lmm_ob.fit()
 
     print("genotype_matrix: ", genotype_matrix.shape)
+    print(genotype_matrix)
 
     with Bench("Doing GWAS"):
         t_stats, p_values = GWAS(pheno_vector,
@@ -720,7 +722,7 @@ class LMM:
        pl.title(title)
 
 
-def gn2_redis(key,species):
+def gn2_redis(key,species,is_testing=False):
     json_params = Redis.get(key)
     
     params = json.loads(json_params)
@@ -729,7 +731,8 @@ def gn2_redis(key,species):
 
     print('kinship', np.array(params['kinship_matrix']))
     print('pheno', np.array(params['pheno_vector']))
-    print('geno', np.array(params['genotype_matrix']))
+    geno = np.array(params['genotype_matrix'])
+    print('geno', geno.shape, geno)
     # sys.exit(1)
     
     if species == "human" :
@@ -741,10 +744,11 @@ def gn2_redis(key,species):
                   tempdata = tempdata)
     else:
         ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']),
-                  genotype_matrix = np.array(params['genotype_matrix']),
+                  genotype_matrix = geno,
                   restricted_max_likelihood = params['restricted_max_likelihood'],
                   refit = params['refit'],
-                  tempdata = tempdata, is_testing=False)
+                  tempdata = tempdata,
+                  is_testing=is_testing)
         
     results_key = "pylmm:results:" + params['temp_uuid']
 
@@ -769,7 +773,7 @@ def gn2_main():
 
     gn2_redis(key,species)
 
-def gn2_load_redis(key,species,kinship,pheno,geno):
+def gn2_load_redis(key,species,kinship,pheno,geno,is_testing):
     print("Loading Redis from parsed data")
     params = dict(pheno_vector = pheno.tolist(),
                   genotype_matrix = geno.tolist(),
@@ -786,7 +790,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno):
     Redis.set(key, json_params)
     Redis.expire(key, 60*60)
 
-    return gn2_redis(key,species)
+    return gn2_redis(key,species,is_testing)
     
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 907a6835..738268be 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -50,6 +50,9 @@ parser.add_option("--geno",dest="geno",
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
+parser.add_option("--test",
+                  action="store_true", dest="testing", default=False,
+                  help="Testing mode")
 
 (options, args) = parser.parse_args()
 
@@ -76,9 +79,9 @@ def normalizeGenotype(G):
     # Run for every SNP list (num individuals)
     x = True - np.isnan(G)  # Matrix of True/False
     m = G[x].mean()         # Global mean value
-    print m
+    # print m
     s = np.sqrt(G[x].var()) # Global stddev
-    print s
+    # print s
     G[np.isnan(G)] = m      # Plug-in mean values for missing
     if s == 0:
         G = G - m           # Subtract the mean
@@ -92,6 +95,7 @@ gn = []
 for snp in g:
     gn.append( normalizeGenotype(snp) )
 
+gn = g
 gn = np.array(gn)
 print("After1",gn)
 gnT = gn.T
@@ -99,6 +103,7 @@ print("After",gnT)
 # G = gnT
 G = gnT
 print "G shape",G.shape
+# sys.exit(1)
 # assert(G[0,0]==-2.25726341)
 
 # Remove individuals with missing phenotypes
@@ -111,5 +116,5 @@ if v.sum():
    k = k[keep,:][:,keep]
 
 if cmd == 'redis':
-    ps, ts = gn2_load_redis('testrun','other',k,y,G)
+    ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
     print np.array(ps)
-- 
cgit v1.2.3


From 084c8fe8c1c3255d59bd1da59b7bebb5c8811ba0 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 11 Mar 2015 12:11:27 +0300
Subject: lmm.py: change message on running from CLI

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'wqflask')

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 5ffb9a4c..58d7593d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -797,7 +797,7 @@ if __name__ == '__main__':
     if has_gn2:
         gn2_main()
     else:
-        print("Run from pylmmGWAS.py instead")
+        print("Run from runlmm.py instead")
 
 
-- 
cgit v1.2.3