From 5ec961a719246d5082814c8eb49ee3f3d7cea11e Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Thu, 18 Dec 2014 16:16:46 +0000
Subject: Committer: Lei Yan <lei@penguin.uthsc.edu> On branch master

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index 9a509baa..b2c841ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
 *.bak
 *~
 web/new_genotypes/HSNIH.json
+wqflask/secure_server.py
-- 
cgit 1.4.1


From 4ea8b8a5efc848c99767894cbdbef2f135e3b83c Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Thu, 18 Dec 2014 16:51:47 +0000
Subject: Committer: Lei Yan <lei@penguin.uthsc.edu> On branch master

---
 web/images/edit.png                           | Bin 0 -> 4322 bytes
 wqflask/wqflask/docs.py                       |   1 +
 wqflask/wqflask/templates/docedit.html        |  20 ++++++++++++++++++++
 wqflask/wqflask/templates/docs.html           |   5 +++++
 wqflask/wqflask/templates/testhtmleditor.html |  20 --------------------
 wqflask/wqflask/views.py                      |   7 ++++---
 6 files changed, 30 insertions(+), 23 deletions(-)
 create mode 100644 web/images/edit.png
 create mode 100755 wqflask/wqflask/templates/docedit.html
 delete mode 100755 wqflask/wqflask/templates/testhtmleditor.html

diff --git a/web/images/edit.png b/web/images/edit.png
new file mode 100644
index 00000000..dda50521
Binary files /dev/null and b/web/images/edit.png differ
diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py
index 65255987..07b0b81a 100755
--- a/wqflask/wqflask/docs.py
+++ b/wqflask/wqflask/docs.py
@@ -11,5 +11,6 @@ class Docs(object):
             WHERE Docs.entry LIKE '%s'
             """
         result = g.db.execute(sql % (entry)).fetchone()
+        self.entry = entry
         self.title = result[0]
         self.content = result[1]
diff --git a/wqflask/wqflask/templates/docedit.html b/wqflask/wqflask/templates/docedit.html
new file mode 100755
index 00000000..1a9e8ca8
--- /dev/null
+++ b/wqflask/wqflask/templates/docedit.html
@@ -0,0 +1,20 @@
+{% extends "base.html" %}
+
+{% block title %}Edit: {{title}}{% endblock %}
+
+{% block content %}
+<div class="container">
+    <h3>Edit: {{title}}</h3>
+    <form>
+        <textarea name="ckcontent" id="ckcontent">
+			{{content|safe}}
+        </textarea>
+        <script type="text/javascript" src="/static/packages/ckeditor/ckeditor.js"></script>
+        <script type="text/javascript">
+            CKEDITOR.replace('ckcontent',	{
+                height: '650px',
+            });
+        </script>
+    </form>
+</div>
+{% endblock %}
diff --git a/wqflask/wqflask/templates/docs.html b/wqflask/wqflask/templates/docs.html
index cbaf1e70..08f95721 100755
--- a/wqflask/wqflask/templates/docs.html
+++ b/wqflask/wqflask/templates/docs.html
@@ -5,6 +5,11 @@
 {% block content %}
 <div class="container">
 	<h3>{{title}}</h3>
+	<div style="text-align: right;">
+		<a href="docedit?entry={{entry}}">
+			<img style="width: 25px;" src="/static/images/edit.png">
+		</a>
+	</div>
 	{{content|safe}}
 </div>
 {% endblock %}
diff --git a/wqflask/wqflask/templates/testhtmleditor.html b/wqflask/wqflask/templates/testhtmleditor.html
deleted file mode 100755
index 1d258d0e..00000000
--- a/wqflask/wqflask/templates/testhtmleditor.html
+++ /dev/null
@@ -1,20 +0,0 @@
-{% extends "base.html" %}
-
-{% block title %}Test html editor{% endblock %}
-
-{% block content %}
-<div class="container">
-    <h2>Test HTML Editor</h2>
-    <form>
-        <textarea name="ckcontent" id="ckcontent">
-        Test HTML
-        </textarea>
-        <script type="text/javascript" src="/static/packages/ckeditor/ckeditor.js"></script>
-        <script type="text/javascript">
-            CKEDITOR.replace('ckcontent',	{
-                height: '500px',
-            });
-        </script>
-    </form>
-</div>
-{% endblock %}
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index cdf93147..deb566ba 100755
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -134,9 +134,10 @@ def search_page():
         else:
             return render_template("search_result_page.html", **result)
 
-@app.route("/testhtmleditor")
-def testhtmleditor_page():
-    return render_template("testhtmleditor.html")
+@app.route("/docedit")
+def docedit():
+    doc = docs.Docs(request.args['entry'])
+    return render_template("docedit.html", **doc.__dict__)
 
 @app.route("/help")
 def help():
-- 
cgit 1.4.1


From 723fcb3723d16e4ef476b0fc7471f7a770ed01be Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Fri, 13 Feb 2015 18:18:49 +0000
Subject: Committer: Lei Yan <lei@penguin.uthsc.edu> On branch master

---
 wqflask/wqflask/news.py             | 16 ++++++++++++++++
 wqflask/wqflask/templates/news.html | 21 +++++++++++++++++++++
 wqflask/wqflask/views.py            | 13 ++++---------
 3 files changed, 41 insertions(+), 9 deletions(-)
 create mode 100755 wqflask/wqflask/news.py
 create mode 100755 wqflask/wqflask/templates/news.html

diff --git a/wqflask/wqflask/news.py b/wqflask/wqflask/news.py
new file mode 100755
index 00000000..62dc1bbb
--- /dev/null
+++ b/wqflask/wqflask/news.py
@@ -0,0 +1,16 @@
+from __future__ import absolute_import, print_function, division
+import sys  
+reload(sys)  
+sys.setdefaultencoding('utf8')
+from flask import g
+
+class News(object):
+
+    def __init__(self):
+        sql = """
+            SELECT News.id, News.date, News.details
+            FROM News
+			order by News.date desc
+            """
+        self.title = "GeneNetwork News"
+        self.newslist = g.db.execute(sql).fetchall()
diff --git a/wqflask/wqflask/templates/news.html b/wqflask/wqflask/templates/news.html
new file mode 100755
index 00000000..0a19dcee
--- /dev/null
+++ b/wqflask/wqflask/templates/news.html
@@ -0,0 +1,21 @@
+{% extends "base.html" %}
+
+{% block title %}{{title}}{% endblock %}
+
+{% block content %}
+<div class="container">
+	<h3>{{title}}</h3>
+	<table class="table table-hover table-striped">
+		<tbody>
+		{% for newsitem in newslist %}
+			<tr>
+				<td style="width: 100px;">
+					<span style="font-weight: bold;">{{newsitem.date}}</span>
+				</td>
+				<td>{{newsitem.details|safe}}</td>
+			</tr>
+		{% endfor %}
+		</tbody>
+	</table>
+</div>
+{% endblock %}
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index deb566ba..7c0f4e14 100755
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -30,6 +30,7 @@ from flask import (render_template, request, make_response, Response,
 
 from wqflask import search_results
 from wqflask import docs
+from wqflask import news
 from base.data_set import DataSet    # Used by YAML in marker_regression
 from base.data_set import create_datasets_list
 from wqflask.show_trait import show_trait
@@ -145,15 +146,9 @@ def help():
     return render_template("docs.html", **doc.__dict__)
 
 @app.route("/news")
-def news():
-    #variables = whats_new.whats_new()
-    with open("/home/sam/gene/wqflask/wqflask/yaml_data/whats_new.yaml") as fh:
-        contents = fh.read()
-        yamilized = yaml.safe_load(contents)
-        news_items = yamilized['news']
-    for news_item in news_items:
-        print("\nnews_item is: %s\n" % (news_item))
-    return render_template("whats_new.html", news_items=news_items)
+def news_route():
+    newsobject = news.News()
+    return render_template("news.html", **newsobject.__dict__)
 
 @app.route("/references")
 def references():
-- 
cgit 1.4.1


From 0e21a58cfe7ca1ec6c1d84c43e1d2fd2d0b9104c Mon Sep 17 00:00:00 2001
From: Lei Yan
Date: Fri, 13 Feb 2015 19:54:00 +0000
Subject: Committer: Lei Yan <lei@penguin.uthsc.edu> On branch master

---
 wqflask/wqflask/templates/news.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wqflask/wqflask/templates/news.html b/wqflask/wqflask/templates/news.html
index 0a19dcee..4f0032b8 100755
--- a/wqflask/wqflask/templates/news.html
+++ b/wqflask/wqflask/templates/news.html
@@ -5,11 +5,11 @@
 {% block content %}
 <div class="container">
 	<h3>{{title}}</h3>
-	<table class="table table-hover table-striped">
+	<table class="table table-hover table-striped" style="width: 600px;">
 		<tbody>
 		{% for newsitem in newslist %}
 			<tr>
-				<td style="width: 100px;">
+				<td style="width: 90px; text-wrap:none;" nowrap>
 					<span style="font-weight: bold;">{{newsitem.date}}</span>
 				</td>
 				<td>{{newsitem.details|safe}}</td>
-- 
cgit 1.4.1


From dbc048f06a50837f59ce4d2ee7cfe84ab8e6062f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 15 Mar 2015 14:41:14 +0300
Subject: Comment

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 5aa27106..1fa7a895 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -334,7 +334,7 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
     inds (columns) by snps (rows).
     """
     G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
-    return kinship(G.T),G
+    return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
 
 def calculate_kinship_old(genotype_matrix, temp_data=None):
     """
-- 
cgit 1.4.1


From 8463812f328973190b5a5160bdcff47a7f1cd12d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Mar 2015 12:41:43 +0300
Subject: Refactoring eigenvalue KvaKve

---
 wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 20 ++++++++++++++++++++
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py     | 13 +++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 28f2042d..bb4f5ace 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -21,10 +21,12 @@
 import sys
 import os
 import numpy as np
+from scipy import linalg
 import multiprocessing as mp # Multiprocessing is part of the Python stdlib
 import Queue
 import time
 
+
 from optmatrix import matrix_initialize, matrixMultT
 
 def kinship_full(G):
@@ -153,5 +155,23 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
    #    np.savetxt(outFile+".kve",Kve)
    return K      
 
+def kvakve(K, verbose=True):
+   """
+   Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
+   of small values < 1e-6 (notably smaller than zero)
+   """
+   if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+   
+   Kva,Kve = linalg.eigh(K)
+   if verbose:
+      print("self.Kva is: ", Kva.shape, Kva)
+      print("self.Kve is: ", Kve.shape, Kve)
+
+   if sum(Kva < 1e-6):
+      if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6)))
+      Kva[Kva < 1e-6] = 1e-6
+   return Kva,Kve
+
+
 
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 1fa7a895..17fe952a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -49,7 +49,7 @@ has_gn2=True
 
 from utility.benchmark import Bench
 from utility import temp_data
-from kinship import kinship, kinship_full
+from kinship import kinship, kinship_full, kvakve
 import genotype
 
 try:
@@ -532,11 +532,12 @@ class LMM:
        print("this K is:", K.shape, pf(K))
        
        if len(Kva) == 0 or len(Kve) == 0:
-          if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-          begin = time.time()
-          Kva,Kve = linalg.eigh(K)
-          end = time.time()
-          if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+          # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+          # begin = time.time()
+          # Kva,Kve = linalg.eigh(K)
+          # end = time.time()
+          # if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+          Kva,Kve = kvakve(K)
 
        self.K = K
        self.Kva = Kva
-- 
cgit 1.4.1


From 3cff3091379a2e8835027c272ada0d8d4314624e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Mar 2015 13:15:21 +0300
Subject: Output added

---
 wqflask/wqflask/my_pylmm/pyLMM/kinship.py |  4 ++--
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py     | 16 +++++++++-------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py  |  1 +
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index bb4f5ace..cd2445f1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -164,8 +164,8 @@ def kvakve(K, verbose=True):
    
    Kva,Kve = linalg.eigh(K)
    if verbose:
-      print("self.Kva is: ", Kva.shape, Kva)
-      print("self.Kve is: ", Kve.shape, Kve)
+      print("Kva is: ", Kva.shape, Kva)
+      print("Kve is: ", Kve.shape, Kve)
 
    if sum(Kva < 1e-6):
       if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6)))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 17fe952a..9fd05b42 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -533,11 +533,12 @@ class LMM:
        
        if len(Kva) == 0 or len(Kve) == 0:
           # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-          # begin = time.time()
+          begin = time.time()
           # Kva,Kve = linalg.eigh(K)
-          # end = time.time()
-          # if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
           Kva,Kve = kvakve(K)
+          end = time.time()
+          if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+          print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
 
        self.K = K
        self.Kva = Kva
@@ -547,10 +548,11 @@ class LMM:
        self.Y = Y
        self.X0 = X0
        self.N = self.K.shape[0]
- 
-       if sum(self.Kva < 1e-6):
-          if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
-          self.Kva[self.Kva < 1e-6] = 1e-6
+
+       # ----> Below moved to kinship.kvakve(K)
+       # if sum(self.Kva < 1e-6):
+       #    if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
+       #    self.Kva[self.Kva < 1e-6] = 1e-6
  
        self.transform()
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6bb79856..cef0cdc4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -120,6 +120,7 @@ if cmd == 'redis':
     G = None
     ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
     print np.array(ps)
+    print sum(ps)
     # Test results
     p1 = round(ps[0],4)
     p2 = round(ps[-1],4)
-- 
cgit 1.4.1


From 62f6db77b22e1fb28b3355d75a30f9ecf6c94d95 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Mar 2015 15:34:38 +0300
Subject: Adding multi-core GWAS

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py    | 213 ++++++++++++++++++++++++++++++
 wqflask/wqflask/my_pylmm/pyLMM/kinship.py |   4 +-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py     |  62 +++++++--
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py  |   8 +-
 4 files changed, 271 insertions(+), 16 deletions(-)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gwas.py

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
new file mode 100644
index 00000000..52455014
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -0,0 +1,213 @@
+# pylmm-based GWAS calculation
+#
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#!/usr/bin/python
+
+import pdb
+import time
+import sys
+# from utility import temp_data
+import lmm
+
+
+import os
+import numpy as np
+import input
+from optmatrix import matrix_initialize
+# from lmm import LMM
+
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+def compute_snp(j,snp_ids,q = None):
+   # print(j,len(snp_ids),"\n")
+   result = []
+   for snp_id in snp_ids:
+      # j,snp_id = collect
+      snp,id = snp_id
+      # id = collect[1]
+      # result = []
+      # Check SNPs for missing values
+      x = snp[keep].reshape((n,1))  # all the SNPs
+      v = np.isnan(x).reshape((-1,))
+      if v.sum():
+         # NOTE: this code appears to be unreachable!
+         if verbose:
+            sys.stderr.write("Found missing values in "+str(x))
+         keeps = True - v
+         xs = x[keeps,:]
+         if keeps.sum() <= 1 or xs.var() <= 1e-6: 
+            # PS.append(np.nan)
+            # TS.append(np.nan)
+            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+            # continue
+            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+            continue
+
+         # Its ok to center the genotype -  I used normalizeGenotype to 
+         # force the removal of missing genotypes as opposed to replacing them with MAF.
+         if not normalizeGenotype:
+            xs = (xs - xs.mean()) / np.sqrt(xs.var())
+         Ys = Y[keeps]
+         X0s = X0[keeps,:]
+         Ks = K[keeps,:][:,keeps]
+         if kfile2:
+            K2s = K2[keeps,:][:,keeps]
+            Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=verbose,K2=K2s)
+         else:
+            Ls = LMM(Ys,Ks,X0=X0s,verbose=verbose)
+         if refit:
+           Ls.fit(X=xs,REML=REML)
+         else:
+            #try:
+            Ls.fit(REML=REML)
+            #except: pdb.set_trace()
+         ts,ps,beta,betaVar = Ls.association(xs,REML=REML,returnBeta=True)
+      else:
+         if x.var() == 0:
+            # Note: this code appears to be unreachable!
+
+            # PS.append(np.nan)
+            # TS.append(np.nan)
+            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
+            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+            continue
+
+         if refit:
+            L.fit(X=x,REML=REML)
+         # This is where it happens
+         ts,ps,beta,betaVar = L.association(x,REML=REML,returnBeta=True)
+      result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
+      # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)])
+   # print [j,result[0]]," in result queue\n"
+   if not q:
+      q = compute_snp.q
+   q.put([j,result])
+   return j
+      # PS.append(ps)
+      # TS.append(ts)
+      # return len(result)
+      # compute.q.put(result)
+      # return None
+
+def f_init(q):
+   compute_snp.q = q
+
+def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
+   """
+   Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+   """
+   matrix_initialize()
+   cpu_num = mp.cpu_count()
+   numThreads = 1
+   kfile2 = False
+
+   sys.stderr.write(str(G.shape)+"\n")
+   n = G.shape[1] # inds
+   inds = n
+   m = G.shape[0] # snps
+   snps = m
+   sys.stderr.write(str(m)+" SNPs\n")
+   # print "***** GWAS: G",G.shape,G
+   assert m>n, "n should be larger than m (snps>inds)"
+
+   # CREATE LMM object for association
+   # if not kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
+   # else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2)
+
+   runlmm = lmm.LMM(Y,K) # ,Kva,Kve,X0,verbose=verbose)
+   if not refit:
+      if verbose: sys.stderr.write("Computing fit for null model\n")
+      runlmm.fit()  # follow GN model in run_other
+      if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (runlmm.optH,runlmm.optSigma))
+      
+   # outFile = "test.out"
+   # out = open(outFile,'w')
+   out = sys.stderr
+
+   def outputResult(id,beta,betaSD,ts,ps):
+      out.write(formatResult(id,beta,betaSD,ts,ps))
+   def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
+   def formatResult(id,beta,betaSD,ts,ps):
+      return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
+
+   printOutHead()
+
+   # Set up the pool
+   # mp.set_start_method('spawn')
+   q = mp.Queue()
+   p = mp.Pool(numThreads, f_init, [q])
+   collect = []
+
+   # Buffers for pvalues and t-stats
+   # PS = []
+   # TS = []
+   count = 0
+
+   completed = 0
+   last_j = 0
+   # for snp_id in G:
+   for snp in G:
+      print snp.shape,snp
+      snp_id = ('SNPID',snp)
+      count += 1
+      if count % 1000 == 0:
+         job = count/1000
+         if verbose:
+            sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+         if numThreads == 1:
+            compute_snp(job,collect,q)
+            collect = []
+            j,lines = q.get()
+            if verbose:
+               sys.stderr.write("Job "+str(j)+" finished\n")
+            for line in lines:
+               out.write(line)
+         else:
+            p.apply_async(compute_snp,(job,collect))
+            collect = []
+            while job > completed:
+               try:
+                  j,lines = q.get_nowait()
+                  if verbose:
+                     sys.stderr.write("Job "+str(j)+" finished\n")
+                  for line in lines:
+                     out.write(line)
+                  completed += 1
+               except Queue.Empty:
+                  pass
+               if job > completed + cpu_num + 5:
+                  time.sleep(1)
+               else:
+                  if job >= completed:
+                    break
+
+      collect.append(snp_id)
+
+   if not numThreads or numThreads > 1:
+      for job in range(int(count/1000)-completed):
+         j,lines = q.get(True,15) # time out
+         if verbose:
+            sys.stderr.write("Job "+str(j)+" finished\n")
+         for line in lines:
+            out.write(line)
+
+   # print collect
+   ps = [item[1][3] for item in collect]
+   ts = [item[1][2] for item in collect]
+   print ps
+   return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index cd2445f1..00f48939 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -26,7 +26,6 @@ import multiprocessing as mp # Multiprocessing is part of the Python stdlib
 import Queue
 import time
 
-
 from optmatrix import matrix_initialize, matrixMultT
 
 def kinship_full(G):
@@ -87,12 +86,13 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
    m = G.shape[0] # snps
    snps = m
    sys.stderr.write(str(m)+" SNPs\n")
-   assert m>n, "n should be larger than m (snps>inds)"
+   assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
 
    q = mp.Queue()
    p = mp.Pool(numThreads, f_init, [q])
    cpu_num = mp.cpu_count()
    print "CPU cores:",cpu_num
+   print snps,computeSize
    iterations = snps/computeSize+1
    # if testing:
    #    iterations = 8
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9fd05b42..8c6d3c3c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -51,6 +51,7 @@ from utility.benchmark import Bench
 from utility import temp_data
 from kinship import kinship, kinship_full, kvakve
 import genotype
+import gwas
 
 try:
     from wqflask.my_pylmm.pyLMM import chunks
@@ -253,7 +254,7 @@ def human_association(snp,
 #        refit=False,
 #        temp_data=None):
     
-def run_other(pheno_vector,
+def run_other_old(pheno_vector,
         genotype_matrix,
         restricted_max_likelihood=True,
         refit=False,
@@ -269,7 +270,7 @@ def run_other(pheno_vector,
     
     """
     
-    print("In run_other")
+    print("In run_other (old)")
     print("REML=",restricted_max_likelihood," REFIT=",refit)
     with Bench("Calculate Kinship"):
         kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
@@ -277,9 +278,51 @@ def run_other(pheno_vector,
     print("kinship_matrix: ", pf(kinship_matrix))
     print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
     
+    # with Bench("Create LMM object"):
+    #     lmm_ob = LMM(pheno_vector, kinship_matrix)
+    
+    # with Bench("LMM_ob fitting"):
+    #     lmm_ob.fit()
+
+    print("genotype_matrix: ", genotype_matrix.shape)
+    print(genotype_matrix)
+
+    with Bench("Doing GWAS"):
+        t_stats, p_values = GWAS(pheno_vector,
+                                      genotype_matrix,
+                                      kinship_matrix,
+                                      restricted_max_likelihood=True,
+                                      refit=False,
+                                      temp_data=tempdata)
+    Bench().report()
+    return p_values, t_stats
+
+def run_other_new(pheno_vector,
+        genotype_matrix,
+        restricted_max_likelihood=True,
+        refit=False,
+        tempdata=None      # <---- can not be None
+        ):
+    
+    """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
+    
+    restricted_max_likelihood -- whether to use restricted max likelihood; True or False
+    refit -- whether to refit the variance component for each marker
+    temp_data -- TempData object that stores the progress for each major step of the
+    calculations ("calculate_kinship" and "GWAS" take the majority of time) 
+    
+    """
+    
+    print("In run_other (new)")
+    print("REML=",restricted_max_likelihood," REFIT=",refit)
+    with Bench("Calculate Kinship"):
+        kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+    
+    print("kinship_matrix: ", pf(kinship_matrix))
+    print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
+
     with Bench("Create LMM object"):
         lmm_ob = LMM(pheno_vector, kinship_matrix)
-    
     with Bench("LMM_ob fitting"):
         lmm_ob.fit()
 
@@ -287,15 +330,15 @@ def run_other(pheno_vector,
     print(genotype_matrix)
 
     with Bench("Doing GWAS"):
-        t_stats, p_values = GWAS(pheno_vector,
-                                genotype_matrix,
-                                kinship_matrix,
-                                restricted_max_likelihood=True,
-                                refit=False,
-                                temp_data=tempdata)
+        t_stats, p_values = gwas.gwas(pheno_vector,
+                                      genotype_matrix.T,
+                                      kinship_matrix,
+                                      restricted_max_likelihood=True,
+                                      refit=False,verbose=True)
     Bench().report()
     return p_values, t_stats
 
+run_other = run_other_old
 
 def matrixMult(A,B):
 
@@ -821,4 +864,3 @@ if __name__ == '__main__':
         print("Run from runlmm.py instead")
 
 
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index cef0cdc4..f17f1bd1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
 import sys
 import tsvreader
 import numpy as np
-from lmm import gn2_load_redis, calculate_kinship
+from lmm import gn2_load_redis, calculate_kinship_old
 from kinship import kinship, kinship_full
 import genotype
 import phenotype
@@ -151,17 +151,17 @@ elif cmd == 'kinship':
     gnt = None
 
     if options.test_kinship:
-        K = kinship_full(G)
+        K = kinship_full(np.copy(G))
         print "Genotype",G.shape, "\n", G
         print "first Kinship method",K.shape,"\n",K
         k1 = round(K[0][0],4)
-        K2 = calculate_kinship(np.copy(G.T),temp_data=None)
+        K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
         print "Genotype",G.shape, "\n", G
         print "GN2 Kinship method",K2.shape,"\n",K2
         k2 = round(K2[0][0],4)
     
     print "Genotype",G.shape, "\n", G
-    K3 = kinship(np.copy(G),options)
+    K3 = kinship(G.T)
     print "third Kinship method",K3.shape,"\n",K3
     sys.stderr.write(options.geno+"\n")
     k3 = round(K3[0][0],4)
-- 
cgit 1.4.1


From cc419336f559a51ed03a669d19042e6fd21355ed Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Mar 2015 17:34:40 +0300
Subject: Adding GWAS code

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py      |  99 ++-----
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py       |  44 ++-
 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py      | 405 ++++++++++++++++++++++++++++
 wqflask/wqflask/my_pylmm/pyLMM/phenotype.py |   2 +-
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py    |  26 +-
 5 files changed, 490 insertions(+), 86 deletions(-)
 create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index 52455014..b9d5db61 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -21,79 +21,30 @@ import pdb
 import time
 import sys
 # from utility import temp_data
-import lmm
-
+import lmm2
 
 import os
 import numpy as np
 import input
 from optmatrix import matrix_initialize
-# from lmm import LMM
+from lmm2 import LMM2
 
 import multiprocessing as mp # Multiprocessing is part of the Python stdlib
 import Queue 
 
-def compute_snp(j,snp_ids,q = None):
-   # print(j,len(snp_ids),"\n")
+def formatResult(id,beta,betaSD,ts,ps):
+   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
+
+def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
+   # print(j,snp_ids,"\n")
    result = []
    for snp_id in snp_ids:
-      # j,snp_id = collect
       snp,id = snp_id
-      # id = collect[1]
-      # result = []
-      # Check SNPs for missing values
-      x = snp[keep].reshape((n,1))  # all the SNPs
-      v = np.isnan(x).reshape((-1,))
-      if v.sum():
-         # NOTE: this code appears to be unreachable!
-         if verbose:
-            sys.stderr.write("Found missing values in "+str(x))
-         keeps = True - v
-         xs = x[keeps,:]
-         if keeps.sum() <= 1 or xs.var() <= 1e-6: 
-            # PS.append(np.nan)
-            # TS.append(np.nan)
-            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
-            # continue
-            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
-            continue
-
-         # Its ok to center the genotype -  I used normalizeGenotype to 
-         # force the removal of missing genotypes as opposed to replacing them with MAF.
-         if not normalizeGenotype:
-            xs = (xs - xs.mean()) / np.sqrt(xs.var())
-         Ys = Y[keeps]
-         X0s = X0[keeps,:]
-         Ks = K[keeps,:][:,keeps]
-         if kfile2:
-            K2s = K2[keeps,:][:,keeps]
-            Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=verbose,K2=K2s)
-         else:
-            Ls = LMM(Ys,Ks,X0=X0s,verbose=verbose)
-         if refit:
-           Ls.fit(X=xs,REML=REML)
-         else:
-            #try:
-            Ls.fit(REML=REML)
-            #except: pdb.set_trace()
-         ts,ps,beta,betaVar = Ls.association(xs,REML=REML,returnBeta=True)
-      else:
-         if x.var() == 0:
-            # Note: this code appears to be unreachable!
-
-            # PS.append(np.nan)
-            # TS.append(np.nan)
-            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
-            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
-            continue
-
-         if refit:
-            L.fit(X=x,REML=REML)
-         # This is where it happens
-         ts,ps,beta,betaVar = L.association(x,REML=REML,returnBeta=True)
+      x = snp.reshape((n,1))  # all the SNPs
+      # if refit:
+      #    L.fit(X=snp,REML=REML)
+      ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
       result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
-      # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)])
-   # print [j,result[0]]," in result queue\n"
    if not q:
       q = compute_snp.q
    q.put([j,result])
@@ -113,8 +64,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    """
    matrix_initialize()
    cpu_num = mp.cpu_count()
-   numThreads = 1
+   numThreads = None
    kfile2 = False
+   reml = restricted_max_likelihood
 
    sys.stderr.write(str(G.shape)+"\n")
    n = G.shape[1] # inds
@@ -123,17 +75,17 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    snps = m
    sys.stderr.write(str(m)+" SNPs\n")
    # print "***** GWAS: G",G.shape,G
-   assert m>n, "n should be larger than m (snps>inds)"
+   assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
 
    # CREATE LMM object for association
    # if not kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
    # else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2)
 
-   runlmm = lmm.LMM(Y,K) # ,Kva,Kve,X0,verbose=verbose)
+   lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
    if not refit:
       if verbose: sys.stderr.write("Computing fit for null model\n")
-      runlmm.fit()  # follow GN model in run_other
-      if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (runlmm.optH,runlmm.optSigma))
+      lmm2.fit()  # follow GN model in run_other
+      if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
       
    # outFile = "test.out"
    # out = open(outFile,'w')
@@ -142,8 +94,6 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    def outputResult(id,beta,betaSD,ts,ps):
       out.write(formatResult(id,beta,betaSD,ts,ps))
    def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-   def formatResult(id,beta,betaSD,ts,ps):
-      return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
 
    printOutHead()
 
@@ -162,15 +112,15 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    last_j = 0
    # for snp_id in G:
    for snp in G:
-      print snp.shape,snp
-      snp_id = ('SNPID',snp)
+      snp_id = (snp,'SNPID')
       count += 1
       if count % 1000 == 0:
          job = count/1000
          if verbose:
             sys.stderr.write("Job %d At SNP %d\n" % (job,count))
          if numThreads == 1:
-            compute_snp(job,collect,q)
+            print "Running on 1 THREAD"
+            compute_snp(job,n,collect,lmm2,reml,q)
             collect = []
             j,lines = q.get()
             if verbose:
@@ -178,7 +128,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             for line in lines:
                out.write(line)
          else:
-            p.apply_async(compute_snp,(job,collect))
+            p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
             collect = []
             while job > completed:
                try:
@@ -205,6 +155,13 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             sys.stderr.write("Job "+str(j)+" finished\n")
          for line in lines:
             out.write(line)
+   else:
+      print "Running on 1 THREAD"
+      # print collect
+      compute_snp(count/1000,n,collect,lmm2,reml,q)
+      j,lines = q.get()
+      for line in lines:
+         out.write(line)
 
    # print collect
    ps = [item[1][3] for item in collect]
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8c6d3c3c..c42f9fb7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -51,6 +51,7 @@ from utility.benchmark import Bench
 from utility import temp_data
 from kinship import kinship, kinship_full, kvakve
 import genotype
+import phenotype
 import gwas
 
 try:
@@ -315,32 +316,45 @@ def run_other_new(pheno_vector,
     
     print("In run_other (new)")
     print("REML=",restricted_max_likelihood," REFIT=",refit)
+
+    # Adjust phenotypes
+    Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
+    print("Removed missing phenotypes",Y.shape)
+    # if options.maf_normalization:
+    #     G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+    #     print "MAF replacements: \n",G
+    # if not options.skip_genotype_normalization:
+    # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+
     with Bench("Calculate Kinship"):
-        kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+        K,G = calculate_kinship(G, tempdata)
     
-    print("kinship_matrix: ", pf(kinship_matrix))
-    print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
+    print("kinship_matrix: ", pf(K))
+    print("kinship_matrix.shape: ", pf(K.shape))
 
-    with Bench("Create LMM object"):
-        lmm_ob = LMM(pheno_vector, kinship_matrix)
-    with Bench("LMM_ob fitting"):
-        lmm_ob.fit()
+    # with Bench("Create LMM object"):
+    #     lmm_ob = lmm2.LMM2(Y,K)
+    # with Bench("LMM_ob fitting"):
+    #     lmm_ob.fit()
 
-    print("genotype_matrix: ", genotype_matrix.shape)
-    print(genotype_matrix)
+    print("genotype_matrix: ", G.shape)
+    print(G)
 
     with Bench("Doing GWAS"):
-        t_stats, p_values = gwas.gwas(pheno_vector,
-                                      genotype_matrix.T,
-                                      kinship_matrix,
+        t_stats, p_values = gwas.gwas(Y,
+                                      G.T,
+                                      K,
                                       restricted_max_likelihood=True,
                                       refit=False,verbose=True)
     Bench().report()
     return p_values, t_stats
 
-run_other = run_other_old
+run_other = run_other_new
 
 def matrixMult(A,B):
+    return np.dot(A,B)
+
+def matrixMult_old(A,B):
 
     # If there is no fblas then we will revert to np.dot()
 
@@ -674,11 +688,15 @@ class LMM:
            optimum.
   
         """
+        print("H=",H)
+        print("X=",X)
+        print("REML=",REML)
         n = len(self.LLs)
         HOpt = []
         for i in range(1,n-2):
             if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: 
                 HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
+                print("HOpt=",HOpt)
                 if np.isnan(HOpt[-1][0]):
                     HOpt[-1][0] = [self.LLs[i-1]]
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
new file mode 100644
index 00000000..cba47a9b
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -0,0 +1,405 @@
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013,2014  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import time
+import numpy as np
+from scipy.linalg import eigh, inv, det
+import scipy.stats as stats # t-tests
+from scipy import optimize
+from optmatrix import matrixMult
+
+def calculateKinship(W,center=False):
+      """
+	 W is an n x m matrix encoding SNP minor alleles.
+
+	 This function takes a matrix oF SNPs, imputes missing values with the maf,
+	 normalizes the resulting vectors and returns the RRM matrix.
+      """
+      n = W.shape[0]
+      m = W.shape[1]
+      keep = []
+      for i in range(m):
+	 mn = W[True - np.isnan(W[:,i]),i].mean()
+	 W[np.isnan(W[:,i]),i] = mn
+	 vr = W[:,i].var()
+	 if vr == 0: continue
+
+	 keep.append(i)
+	 W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
+
+      W = W[:,keep]
+      K = matrixMult(W,W.T) * 1.0/float(m)
+      if center:
+	 P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n))
+	 S = np.trace(matrixMult(matrixMult(P,K),P))
+	 K_n = (n - 1)*K / S
+	 return K_n
+      return K
+
+def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
+      """
+
+        Performs a basic GWAS scan using the LMM.  This function
+        uses the LMM module to assess association at each SNP and 
+        does some simple cleanup, such as removing missing individuals 
+        per SNP and re-computing the eigen-decomp
+
+	Y - n x 1 phenotype vector 
+        X - n x m SNP matrix (genotype matrix)
+	K - n x n kinship matrix
+        Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K
+        X0 - n x q covariate matrix
+	REML - use restricted maximum likelihood 
+        refit - refit the variance component for each SNP
+
+      """
+      n = X.shape[0]
+      m = X.shape[1]
+      prins("Initialize GWAS")
+      print("genotype matrix n is:", n)
+      print("genotype matrix m is:", m)
+
+      if X0 == None: 
+         X0 = np.ones((n,1))
+      
+      # Remove missing values in Y and adjust associated parameters
+      v = np.isnan(Y)
+      if v.sum():
+	 keep = True - v
+	 keep = keep.reshape((-1,))
+	 Y = Y[keep]
+	 X = X[keep,:]
+	 X0 = X0[keep,:]
+	 K = K[keep,:][:,keep]
+	 Kva = []
+	 Kve = []
+
+      if len(Y) == 0: 
+         return np.ones(m)*np.nan,np.ones(m)*np.nan
+
+      L = LMM(Y,K,Kva,Kve,X0)
+      if not refit: L.fit()
+
+      PS = []
+      TS = []
+
+      n = X.shape[0]
+      m = X.shape[1]
+
+      for i in range(m):
+	 x = X[:,i].reshape((n,1))
+	 v = np.isnan(x).reshape((-1,))
+	 if v.sum():
+	    keep = True - v
+	    xs = x[keep,:]
+	    if xs.var() == 0: 
+	       PS.append(np.nan) 
+	       TS.append(np.nan) 
+	       continue
+
+	    Ys = Y[keep]
+	    X0s = X0[keep,:]
+	    Ks = K[keep,:][:,keep]
+	    Ls = LMM(Ys,Ks,X0=X0s)
+	    if refit: 
+               Ls.fit(X=xs)
+	    else: 
+               Ls.fit()
+	    ts,ps = Ls.association(xs,REML=REML)
+	 else: 
+	    if x.var() == 0: 
+	       PS.append(np.nan) 
+	       TS.append(np.nan) 
+	       continue
+
+	    if refit: 
+               L.fit(X=x)
+	    ts,ps = L.association(x,REML=REML)
+	    
+	 PS.append(ps)
+	 TS.append(ts)
+
+      return TS,PS
+
+class LMM2:
+
+   """
+	 This is a simple version of EMMA/fastLMM.  
+	 The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
+	 and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
+	 There are three model parameters: heritability (h), covariate coefficients (beta) and the total
+	 phenotypic variance (sigma).
+	 Heritability as defined here is the proportion of the total variance (sigma) that is attributed to 
+	 the kinship matrix.
+
+	 For simplicity, we assume that everything being input is a numpy array.
+	 If this is not the case, the module may throw an error as conversion from list to numpy array
+	 is not done consistently.
+
+   """
+   def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
+
+      """
+      The constructor takes a phenotype vector or array Y of size n.
+      It takes a kinship matrix K of size n x n.  Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
+      If they are not provided, the constructor will calculate them.
+      X0 is an optional covariate matrix of size n x q, where there are q covariates.
+      When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+      """
+
+      if X0 == None: 
+	 X0 = np.ones(len(Y)).reshape(len(Y),1)
+      self.verbose = verbose
+
+      x = True - np.isnan(Y)
+      x = x.reshape(-1,)
+      if not x.sum() == len(Y):
+	 if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
+	 Y = Y[x]
+	 K = K[x,:][:,x]
+	 X0 = X0[x,:]
+	 Kva = []
+	 Kve = []
+      self.nonmissing = x
+
+      if len(Kva) == 0 or len(Kve) == 0:
+	 if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+	 begin = time.time()
+	 Kva,Kve = eigh(K)
+	 end = time.time()
+	 if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+      
+      self.K = K
+      self.Kva = Kva
+      self.Kve = Kve
+      self.N = self.K.shape[0]
+      self.Y = Y.reshape((self.N,1))
+      self.X0 = X0
+
+      if sum(self.Kva < 1e-6):
+         if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
+         self.Kva[self.Kva < 1e-6] = 1e-6
+
+      self.transform()
+
+   def transform(self):
+
+      """
+	 Computes a transformation on the phenotype vector and the covariate matrix.
+	 The transformation is obtained by left multiplying each parameter by the transpose of the 
+	 eigenvector matrix of K (the kinship).
+      """
+
+      self.Yt = matrixMult(self.Kve.T, self.Y)
+      self.X0t = matrixMult(self.Kve.T, self.X0)
+      self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))])
+      self.q = self.X0t.shape[1]
+
+   def getMLSoln(self,h,X):
+
+      """
+	 Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
+	 the total variance of the trait (sigma) and also passes intermediates that can 
+	 be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
+	 the heritability or the proportion of the total variance attributed to genetics.  The X is the 
+	 covariate matrix.
+      """
+   
+      S = 1.0/(h*self.Kva + (1.0 - h))
+      Xt = X.T*S
+      XX = matrixMult(Xt,X)
+      XX_i = inv(XX)
+      beta =  matrixMult(matrixMult(XX_i,Xt),self.Yt)
+      Yt = self.Yt - matrixMult(X,beta)
+      Q = np.dot(Yt.T*S,Yt)
+      sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
+      return beta,sigma,Q,XX_i,XX
+
+   def LL_brent(self,h,X=None,REML=False): 
+      #brent will not be bounded by the specified bracket.
+      # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search.
+      if h < 0: return 1e6
+      return -self.LL(h,X,stack=False,REML=REML)[0]
+	 
+   def LL(self,h,X=None,stack=True,REML=False):
+
+      """
+	 Computes the log-likelihood for a given heritability (h).  If X==None, then the 
+	 default X0t will be used.  If X is set and stack=True, then X0t will be matrix concatenated with
+	 the input X.  If stack is false, then X is used in place of X0t in the LL calculation.
+	 REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
+      """
+
+      if X == None: X = self.X0t
+      elif stack: 
+	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
+	 X = self.X0t_stack
+
+      n = float(self.N)
+      q = float(X.shape[1])
+      beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
+      LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
+      LL = -0.5 * LL
+
+      if REML:
+	 LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX))
+	 LL = LL + 0.5*LL_REML_part
+
+
+      LL = LL.sum()
+      return LL,beta,sigma,XX_i
+
+   def getMax(self,H, X=None,REML=False):
+
+      """
+	 Helper functions for .fit(...).  
+	 This function takes a set of LLs computed over a grid and finds possible regions 
+	 containing a maximum.  Within these regions, a Brent search is performed to find the 
+	 optimum.
+
+      """
+      n = len(self.LLs)
+      HOpt = []
+      for i in range(1,n-2):
+          if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]:
+	    HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
+	    if np.isnan(HOpt[-1]): HOpt[-1] = H[i-1]
+	    #if np.isnan(HOpt[-1]): HOpt[-1] = self.LLs[i-1]
+	    #if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]]
+
+      if len(HOpt) > 1: 
+	 if self.verbose: sys.stderr.write("NOTE: Found multiple optima.  Returning first...\n")
+	 return HOpt[0]
+      elif len(HOpt) == 1: return HOpt[0]
+      elif self.LLs[0] > self.LLs[n-1]: return H[0]
+      else: return H[n-1]
+
+
+   def fit(self,X=None,ngrids=100,REML=True):
+
+      """
+	 Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
+	 X can be passed and will transformed and concatenated to X0t.  Otherwise, X0t is used as 
+	 the covariate matrix.
+
+	 This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
+	 Given this optimum, the function computes the LL and associated ML solutions.
+      """
+      
+      if X == None: X = self.X0t
+      else: 
+	 #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
+	 self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
+	 X = self.X0t_stack
+
+      H = np.array(range(ngrids)) / float(ngrids)
+      L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
+      self.LLs = L
+
+      hmax = self.getMax(H,X,REML)
+      L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
+      
+      self.H = H
+      self.optH = hmax.sum()
+      self.optLL = L
+      self.optBeta = beta
+      self.optSigma = sigma.sum()
+
+      return hmax,beta,sigma,L
+
+   def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
+      """
+	Calculates association statitics for the SNPs encoded in the vector X of size n.
+	If h == None, the optimal h stored in optH is used.
+
+      """
+      if False:
+            print "X=",X
+            print "h=",h
+            print "q=",self.q
+            print "self.Kve=",self.Kve
+            print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack
+      
+      if stack:
+	 # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
+         m = matrixMult(self.Kve.T,X)
+         # print "m=",m
+         m = m[:,0]
+         self.X0t_stack[:,(self.q)] = m
+	 X = self.X0t_stack
+	 
+      if h == None: h = self.optH
+
+      L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
+      q  = len(beta)
+      ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q)
+      
+      if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma
+      return ts,ps
+
+   def tstat(self,beta,var,sigma,q,log=False): 
+
+	 """
+	    Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
+	    This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
+	 """
+
+	 ts = beta / np.sqrt(var * sigma)	 
+	 #ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
+	 # sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
+	 if log:
+	    ps = 2.0 + (stats.t.logsf(np.abs(ts), self.N-q))
+	 else:
+	    ps = 2.0*(stats.t.sf(np.abs(ts), self.N-q))
+	 if not len(ts) == 1 or not len(ps) == 1: 
+	    raise Exception("Something bad happened :(")
+	 return ts.sum(),ps.sum()
+
+   def plotFit(self,color='b-',title=''):
+
+      """
+	 Simple function to visualize the likelihood space.  It takes the LLs 
+	 calcualted over a grid and normalizes them by subtracting off the mean and exponentiating.
+	 The resulting "probabilities" are normalized to one and plotted against heritability.
+	 This can be seen as an approximation to the posterior distribuiton of heritability.
+
+	 For diagnostic purposes this lets you see if there is one distinct maximum or multiple 
+	 and what the variance of the parameter looks like.
+      """
+      import matplotlib.pyplot as pl
+
+      mx = self.LLs.max()
+      p = np.exp(self.LLs - mx)
+      p = p/p.sum()
+
+      pl.plot(self.H,p,color)
+      pl.xlabel("Heritability")
+      pl.ylabel("Probability of data")
+      pl.title(title)
+   
+   def meanAndVar(self):
+
+      mx = self.LLs.max()
+      p = np.exp(self.LLs - mx)
+      p = p/p.sum()
+
+      mn = (self.H * p).sum()
+      vx = ((self.H - mn)**2 * p).sum()
+
+      return mn,vx
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index bb620052..682ba371 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -36,5 +36,5 @@ def remove_missing(y,g,verbose=False):
             sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
         y1 = y[keep]
         g1 = g[keep,:]
-    return y1,g1
+    return y1,g1,keep
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index f17f1bd1..4268f3be 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -99,13 +99,37 @@ if options.geno:
     g = tsvreader.geno(options.geno)
     print g.shape
 
+if cmd == 'redis_new':
+    # Emulating the redis setup of GN2
+    Y = y
+    G = g
+    print "Original G",G.shape, "\n", G
+
+    gt = G.T
+    G = None
+    ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
+    print np.array(ps)
+    print sum(ps)
+    # Test results
+    p1 = round(ps[0],4)
+    p2 = round(ps[-1],4)
+    sys.stderr.write(options.geno+"\n")
+    if options.geno == 'data/small.geno':
+        assert p1==0.0708, "p1=%f" % p1
+        assert p2==0.1417, "p2=%f" % p2
+    if options.geno == 'data/small_na.geno':
+        assert p1==0.0958, "p1=%f" % p1
+        assert p2==0.0435, "p2=%f" % p2
+    if options.geno == 'data/test8000.geno':
+        assert p1==0.8984, "p1=%f" % p1
+        assert p2==0.9623, "p2=%f" % p2
 if cmd == 'redis':
     # Emulating the redis setup of GN2
     G = g
     print "Original G",G.shape, "\n", G
     if y != None:
         gnt = np.array(g).T
-        Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+        Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
         G = g.T
         print "Removed missing phenotypes",G.shape, "\n", G
     if options.maf_normalization:
-- 
cgit 1.4.1


From 886934b9cf18cf092c5d6cd0667d860aa30e64b8 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Mar 2015 18:08:00 +0300
Subject: GWAS: results get returned

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 56 +++++++++++++++++++---------------
 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 10 +++---
 2 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b9d5db61..f8d77ab6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -36,15 +36,17 @@ def formatResult(id,beta,betaSD,ts,ps):
    return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
 
 def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
-   # print(j,snp_ids,"\n")
+   # print("COMPUTE SNP",j,snp_ids,"\n")
    result = []
    for snp_id in snp_ids:
       snp,id = snp_id
       x = snp.reshape((n,1))  # all the SNPs
+      # print "X=",x
       # if refit:
       #    L.fit(X=snp,REML=REML)
       ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
-      result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
+      # result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
+      result.append( (ts,ps) )
    if not q:
       q = compute_snp.q
    q.put([j,result])
@@ -95,7 +97,8 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
       out.write(formatResult(id,beta,betaSD,ts,ps))
    def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
 
-   printOutHead()
+   # printOutHead()
+   res = []
 
    # Set up the pool
    # mp.set_start_method('spawn')
@@ -114,6 +117,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    for snp in G:
       snp_id = (snp,'SNPID')
       count += 1
+      # print count,snp_id
       if count % 1000 == 0:
          job = count/1000
          if verbose:
@@ -122,21 +126,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             print "Running on 1 THREAD"
             compute_snp(job,n,collect,lmm2,reml,q)
             collect = []
-            j,lines = q.get()
+            j,lst = q.get()
             if verbose:
                sys.stderr.write("Job "+str(j)+" finished\n")
-            for line in lines:
-               out.write(line)
+            # for line in lines:
+            #    out.write(line)
+            res.append(lst)
          else:
             p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
             collect = []
             while job > completed:
                try:
-                  j,lines = q.get_nowait()
+                  j,lst = q.get_nowait()
                   if verbose:
                      sys.stderr.write("Job "+str(j)+" finished\n")
-                  for line in lines:
-                     out.write(line)
+                  # for line in lines:
+                  #    out.write(line)
+                  res.append(lst)
                   completed += 1
                except Queue.Empty:
                   pass
@@ -148,23 +154,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
 
       collect.append(snp_id)
 
-   if not numThreads or numThreads > 1:
+   if numThreads==1:
+      print "Running on 1 THREAD"
+      compute_snp(count/1000,n,collect,lmm2,reml,q)
+      j,lst = q.get()
+      # for line in lines:
+      #    out.write(line)
+      res.append(lst)
+   else:
       for job in range(int(count/1000)-completed):
-         j,lines = q.get(True,15) # time out
+         j,lst = q.get(True,15) # time out
          if verbose:
             sys.stderr.write("Job "+str(j)+" finished\n")
-         for line in lines:
-            out.write(line)
-   else:
-      print "Running on 1 THREAD"
-      # print collect
-      compute_snp(count/1000,n,collect,lmm2,reml,q)
-      j,lines = q.get()
-      for line in lines:
-         out.write(line)
-
-   # print collect
-   ps = [item[1][3] for item in collect]
-   ts = [item[1][2] for item in collect]
-   print ps
+         res.append(lst)
+
+   # print res
+   ts = [item[0] for res1 in res for item in res1]
+   ps = [item[1] for res1 in res for item in res1]
+   # ts = [item[1] for item in res]
+   # print ps
    return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index cba47a9b..d50b0111 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -329,11 +329,11 @@ class LMM2:
 
       """
       if False:
-            print "X=",X
-            print "h=",h
-            print "q=",self.q
-            print "self.Kve=",self.Kve
-            print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack
+         print "X=",X
+         print "h=",h
+         print "q=",self.q
+         print "self.Kve=",self.Kve
+         print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack
       
       if stack:
 	 # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
-- 
cgit 1.4.1


From 7268ed46c9bce6fea58b55723c325426bdeeb617 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 10:08:41 +0300
Subject: GWAS multi-core: Tweak queue

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index f8d77ab6..2a472717 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -146,8 +146,8 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
                   completed += 1
                except Queue.Empty:
                   pass
-               if job > completed + cpu_num + 5:
-                  time.sleep(1)
+               if job > completed + cpu_num*2:
+                  time.sleep(0.1)
                else:
                   if job >= completed:
                     break
-- 
cgit 1.4.1


From 7c958796142e9fe0cc3155d10581d68ae2bbc5df Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 10:39:58 +0300
Subject: lmm.py: bring to original state

---
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index c42f9fb7..994b6be7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -688,15 +688,11 @@ class LMM:
            optimum.
   
         """
-        print("H=",H)
-        print("X=",X)
-        print("REML=",REML)
         n = len(self.LLs)
         HOpt = []
         for i in range(1,n-2):
             if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: 
                 HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
-                print("HOpt=",HOpt)
                 if np.isnan(HOpt[-1][0]):
                     HOpt[-1][0] = [self.LLs[i-1]]
 
-- 
cgit 1.4.1


From 46170b2741d006db89ce69128feff51c6e0e7752 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 11:05:30 +0300
Subject: lmm1: fixed a few regressions introduced while debugging

---
 wqflask/wqflask/my_pylmm/pyLMM/kinship.py |  2 +-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py     | 42 ++++++++++++++++++++-----------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py  |  4 +--
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 00f48939..0c43587e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -168,7 +168,7 @@ def kvakve(K, verbose=True):
       print("Kve is: ", Kve.shape, Kve)
 
    if sum(Kva < 1e-6):
-      if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6)))
+      if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
       Kva[Kva < 1e-6] = 1e-6
    return Kva,Kve
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 994b6be7..2014ffb8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -271,7 +271,7 @@ def run_other_old(pheno_vector,
     
     """
     
-    print("In run_other (old)")
+    print("Running the original LMM engine in run_other (old)")
     print("REML=",restricted_max_likelihood," REFIT=",refit)
     with Bench("Calculate Kinship"):
         kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
@@ -314,7 +314,7 @@ def run_other_new(pheno_vector,
     
     """
     
-    print("In run_other (new)")
+    print("Running the new LMM2 engine in run_other_new")
     print("REML=",restricted_max_likelihood," REFIT=",refit)
 
     # Adjust phenotypes
@@ -349,12 +349,10 @@ def run_other_new(pheno_vector,
     Bench().report()
     return p_values, t_stats
 
-run_other = run_other_new
+# def matrixMult(A,B):
+#     return np.dot(A,B)
 
 def matrixMult(A,B):
-    return np.dot(A,B)
-
-def matrixMult_old(A,B):
 
     # If there is no fblas then we will revert to np.dot()
 
@@ -772,7 +770,10 @@ class LMM:
  
           ts = beta / np.sqrt(var * sigma)        
           ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
-          if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(")
+          if not len(ts) == 1 or not len(ps) == 1:
+              print("ts=",ts)
+              print("ps=",ps)
+              raise Exception("Something bad happened :(")
           return ts.sum(),ps.sum()
 
     def plotFit(self,color='b-',title=''):
@@ -798,7 +799,11 @@ class LMM:
        pl.title(title)
 
 
-def gn2_redis(key,species):
+def gn2_redis(key,species,new_code=True):
+    """
+    Invoke pylmm using Redis as a container. new_code runs the new
+    version
+    """
     json_params = Redis.get(key)
     
     params = json.loads(json_params)
@@ -818,11 +823,18 @@ def gn2_redis(key,species):
                   refit = params['refit'],
                   tempdata = tempdata)
     else:
-        ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']),
-                  genotype_matrix = geno,
-                  restricted_max_likelihood = params['restricted_max_likelihood'],
-                  refit = params['refit'],
-                  tempdata = tempdata)
+        if new_code:
+            ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']),
+                               genotype_matrix = geno,
+                               restricted_max_likelihood = params['restricted_max_likelihood'],
+                               refit = params['refit'],
+                               tempdata = tempdata)
+        else:
+            ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']),
+                               genotype_matrix = geno,
+                               restricted_max_likelihood = params['restricted_max_likelihood'],
+                               refit = params['refit'],
+                               tempdata = tempdata)
         
     results_key = "pylmm:results:" + params['temp_uuid']
 
@@ -847,7 +859,7 @@ def gn2_main():
 
     gn2_redis(key,species)
 
-def gn2_load_redis(key,species,kinship,pheno,geno):
+def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
     print("Loading Redis from parsed data")
     if kinship == None:
         k = None
@@ -868,7 +880,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno):
     Redis.set(key, json_params)
     Redis.expire(key, 60*60)
 
-    return gn2_redis(key,species)
+    return gn2_redis(key,species,new_code)
     
 if __name__ == '__main__':
     print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 4268f3be..1af5a443 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -107,7 +107,7 @@ if cmd == 'redis_new':
 
     gt = G.T
     G = None
-    ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
+    ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
     print np.array(ps)
     print sum(ps)
     # Test results
@@ -142,7 +142,7 @@ if cmd == 'redis':
 
     gt = G.T
     G = None
-    ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
+    ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
     print np.array(ps)
     print sum(ps)
     # Test results
-- 
cgit 1.4.1


From 73ed2f67bd0478473b887902ae96caaa0fca58d4 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 13:10:49 +0300
Subject: GWAS: one result is missing

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py   | 33 +++++++++++++-----------------
 wqflask/wqflask/my_pylmm/pyLMM/input.py  |  2 ++
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py    | 10 ++++++---
 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py   | 17 ++++++++++------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 35 +++++++++++++++++---------------
 5 files changed, 53 insertions(+), 44 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index 2a472717..20083bde 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -66,7 +66,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    """
    matrix_initialize()
    cpu_num = mp.cpu_count()
-   numThreads = None
+   numThreads = None # for now use all available threads
    kfile2 = False
    reml = restricted_max_likelihood
 
@@ -110,10 +110,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    # PS = []
    # TS = []
    count = 0
-
-   completed = 0
-   last_j = 0
-   # for snp_id in G:
+   jobs_running = 0
    for snp in G:
       snp_id = (snp,'SNPID')
       count += 1
@@ -129,13 +126,12 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             j,lst = q.get()
             if verbose:
                sys.stderr.write("Job "+str(j)+" finished\n")
-            # for line in lines:
-            #    out.write(line)
             res.append(lst)
          else:
             p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
+            jobs_running += 1
             collect = []
-            while job > completed:
+            while jobs_running:
                try:
                   j,lst = q.get_nowait()
                   if verbose:
@@ -143,34 +139,33 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
                   # for line in lines:
                   #    out.write(line)
                   res.append(lst)
-                  completed += 1
+                  jobs_running -= 1
                except Queue.Empty:
                   pass
-               if job > completed + cpu_num*2:
-                  time.sleep(0.1)
+               if jobs_running + cpu_num*2 > 0:
+                  time.sleep(1.0)
                else:
-                  if job >= completed:
+                  if jobs_running > 0:
                     break
 
       collect.append(snp_id)
 
-   if numThreads==1:
+   if numThreads==1 or count<1000:
       print "Running on 1 THREAD"
       compute_snp(count/1000,n,collect,lmm2,reml,q)
       j,lst = q.get()
-      # for line in lines:
-      #    out.write(line)
       res.append(lst)
    else:
-      for job in range(int(count/1000)-completed):
+      print "count=",count," running=",jobs_running," collect=",len(collect)
+      for job in range(jobs_running):
          j,lst = q.get(True,15) # time out
          if verbose:
             sys.stderr.write("Job "+str(j)+" finished\n")
          res.append(lst)
 
-   # print res
+   if verbose:
+      print "res=",res[0][0:10]
+   print [len(res1) for res1 in res]
    ts = [item[0] for res1 in res for item in res1]
    ps = [item[1] for res1 in res for item in res1]
-   # ts = [item[1] for item in res]
-   # print ps
    return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index f7b556a5..7063fedf 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -135,6 +135,8 @@ class plink:
     def normalizeGenotype(self,G):
         # print "Before",G
         # print G.shape
+        print "call input.normalizeGenotype"
+        raise "This should not be used"
         x = True - np.isnan(G)
         m = G[x].mean()
         s = np.sqrt(G[x].var())
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 2014ffb8..8a24d98b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -285,7 +285,7 @@ def run_other_old(pheno_vector,
     # with Bench("LMM_ob fitting"):
     #     lmm_ob.fit()
 
-    print("genotype_matrix: ", genotype_matrix.shape)
+    print("run_other_old genotype_matrix: ", genotype_matrix.shape)
     print(genotype_matrix)
 
     with Bench("Doing GWAS"):
@@ -320,6 +320,7 @@ def run_other_new(pheno_vector,
     # Adjust phenotypes
     Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
     print("Removed missing phenotypes",Y.shape)
+
     # if options.maf_normalization:
     #     G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
     #     print "MAF replacements: \n",G
@@ -337,7 +338,7 @@ def run_other_new(pheno_vector,
     # with Bench("LMM_ob fitting"):
     #     lmm_ob.fit()
 
-    print("genotype_matrix: ", G.shape)
+    print("run_other_new genotype_matrix: ", G.shape)
     print(G)
 
     with Bench("Doing GWAS"):
@@ -388,7 +389,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
     Call the new kinship calculation where genotype_matrix contains
     inds (columns) by snps (rows).
     """
+    print("call genotype.normalize")
     G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
+    print("call calculate_kinship_new")
     return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
 
 def calculate_kinship_old(genotype_matrix, temp_data=None):
@@ -399,6 +402,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
     normalizes the resulting vectors and returns the RRM matrix.
     
     """
+    print("call calculate_kinship_old")
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
     print("genotype 2D matrix n (inds) is:", n)
@@ -429,7 +433,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
             temp_data.store("percent_complete", percent_complete)
         
     genotype_matrix = genotype_matrix[:,keep]
-    print("genotype_matrix: ", pf(genotype_matrix))
+    print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
     kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
     return kinship_matrix,genotype_matrix
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d50b0111..d4b3ac82 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -22,6 +22,7 @@ from scipy.linalg import eigh, inv, det
 import scipy.stats as stats # t-tests
 from scipy import optimize
 from optmatrix import matrixMult
+import kinship
 
 def calculateKinship(W,center=False):
       """
@@ -177,13 +178,17 @@ class LMM2:
 	 Kve = []
       self.nonmissing = x
 
-      if len(Kva) == 0 or len(Kve) == 0:
-	 if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-	 begin = time.time()
-	 Kva,Kve = eigh(K)
-	 end = time.time()
-	 if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+      print("this K is:", K.shape, K)
       
+      if len(Kva) == 0 or len(Kve) == 0:
+          # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+          begin = time.time()
+          # Kva,Kve = linalg.eigh(K)
+          Kva,Kve = kinship.kvakve(K)
+          end = time.time()
+          if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+          print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
+
       self.K = K
       self.Kva = Kva
       self.Kve = Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 1af5a443..708c9185 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -54,9 +54,9 @@ parser.add_option("--geno",dest="geno",
 parser.add_option("--maf-normalization",
                   action="store_true", dest="maf_normalization", default=False,
                   help="Apply MAF genotype normalization")
-parser.add_option("--skip-genotype-normalization",
-                  action="store_true", dest="skip_genotype_normalization", default=False,
-                  help="Skip genotype normalization")
+parser.add_option("--genotype-normalization",
+                  action="store_true", dest="genotype_normalization", default=False,
+                  help="Force genotype normalization")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -100,7 +100,8 @@ if options.geno:
     print g.shape
 
 if cmd == 'redis_new':
-    # Emulating the redis setup of GN2
+    # The main difference between redis_new and redis is that missing
+    # phenotypes are handled by the first
     Y = y
     G = g
     print "Original G",G.shape, "\n", G
@@ -109,7 +110,7 @@ if cmd == 'redis_new':
     G = None
     ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
     print np.array(ps)
-    print sum(ps)
+    print len(ps),sum(ps)
     # Test results
     p1 = round(ps[0],4)
     p2 = round(ps[-1],4)
@@ -118,12 +119,14 @@ if cmd == 'redis_new':
         assert p1==0.0708, "p1=%f" % p1
         assert p2==0.1417, "p2=%f" % p2
     if options.geno == 'data/small_na.geno':
-        assert p1==0.0958, "p1=%f" % p1
-        assert p2==0.0435, "p2=%f" % p2
+        assert p1==0.0897, "p1=%f" % p1
+        assert p2==0.0405, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
         assert p1==0.8984, "p1=%f" % p1
-        assert p2==0.9623, "p2=%f" % p2
-if cmd == 'redis':
+        assert p2==0.9620, "p2=%f" % p2
+        assert sum(ps) == 4070.02346579
+        assert len(ps) == 8000
+elif cmd == 'redis':
     # Emulating the redis setup of GN2
     G = g
     print "Original G",G.shape, "\n", G
@@ -135,7 +138,7 @@ if cmd == 'redis':
     if options.maf_normalization:
         G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
         print "MAF replacements: \n",G
-    if not options.skip_genotype_normalization:
+    if options.genotype_normalization:
         G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
     g = None
     gnt = None
@@ -144,8 +147,8 @@ if cmd == 'redis':
     G = None
     ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
     print np.array(ps)
-    print sum(ps)
-    # Test results
+    print len(ps),sum(ps)
+    # Test results 4070.02346579
     p1 = round(ps[0],4)
     p2 = round(ps[-1],4)
     sys.stderr.write(options.geno+"\n")
@@ -153,11 +156,11 @@ if cmd == 'redis':
         assert p1==0.0708, "p1=%f" % p1
         assert p2==0.1417, "p2=%f" % p2
     if options.geno == 'data/small_na.geno':
-        assert p1==0.0958, "p1=%f" % p1
-        assert p2==0.0435, "p2=%f" % p2
+        assert p1==0.0897, "p1=%f" % p1
+        assert p2==0.0405, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
         assert p1==0.8984, "p1=%f" % p1
-        assert p2==0.9623, "p2=%f" % p2
+        assert p2==0.8827, "p2=%f" % p2
 elif cmd == 'kinship':
     G = g
     print "Original G",G.shape, "\n", G
@@ -169,7 +172,7 @@ elif cmd == 'kinship':
     if options.maf_normalization:
         G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
         print "MAF replacements: \n",G
-    if not options.skip_genotype_normalization:
+    if options.genotype_normalization:
         G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
     g = None
     gnt = None
-- 
cgit 1.4.1


From 5cee365cc2d8bf8cafca018b9a04e4821616899d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 13:27:38 +0300
Subject: GWAS: should be correct now

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py   | 26 ++++++++++++--------------
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py |  6 +++---
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index 20083bde..ce7842f7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -131,37 +131,35 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
             jobs_running += 1
             collect = []
-            while jobs_running:
+            while jobs_running > cpu_num:
                try:
                   j,lst = q.get_nowait()
                   if verbose:
                      sys.stderr.write("Job "+str(j)+" finished\n")
-                  # for line in lines:
-                  #    out.write(line)
                   res.append(lst)
                   jobs_running -= 1
                except Queue.Empty:
+                  time.sleep(0.1)
                   pass
-               if jobs_running + cpu_num*2 > 0:
+               if jobs_running > cpu_num*2:
                   time.sleep(1.0)
                else:
-                  if jobs_running > 0:
-                    break
+                  break
 
       collect.append(snp_id)
 
-   if numThreads==1 or count<1000:
+   if numThreads==1 or count<1000 or len(collect)>0:
       print "Running on 1 THREAD"
       compute_snp(count/1000,n,collect,lmm2,reml,q)
+      collect = []
       j,lst = q.get()
       res.append(lst)
-   else:
-      print "count=",count," running=",jobs_running," collect=",len(collect)
-      for job in range(jobs_running):
-         j,lst = q.get(True,15) # time out
-         if verbose:
-            sys.stderr.write("Job "+str(j)+" finished\n")
-         res.append(lst)
+   print "count=",count," running=",jobs_running," collect=",len(collect)
+   for job in range(jobs_running):
+      j,lst = q.get(True,15) # time out
+      if verbose:
+         sys.stderr.write("Job "+str(j)+" finished\n")
+      res.append(lst)
 
    if verbose:
       print "res=",res[0][0:10]
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 708c9185..e301ef1a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -122,9 +122,9 @@ if cmd == 'redis_new':
         assert p1==0.0897, "p1=%f" % p1
         assert p2==0.0405, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
-        assert p1==0.8984, "p1=%f" % p1
-        assert p2==0.9620, "p2=%f" % p2
-        assert sum(ps) == 4070.02346579
+        # assert p1==0.8984, "p1=%f" % p1
+        # assert p2==0.9621, "p2=%f" % p2
+        assert round(sum(ps)) == 4070
         assert len(ps) == 8000
 elif cmd == 'redis':
     # Emulating the redis setup of GN2
-- 
cgit 1.4.1


From 62f287b551a3b207eadffb52d6de1e0ef64a1a08 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 16:28:08 +0300
Subject: runlmm: Explicit handling of missing phenotypes

---
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index e301ef1a..324c4f2c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -57,6 +57,9 @@ parser.add_option("--maf-normalization",
 parser.add_option("--genotype-normalization",
                   action="store_true", dest="genotype_normalization", default=False,
                   help="Force genotype normalization")
+parser.add_option("--remove-missing-phenotypes",
+                  action="store_true", dest="remove_missing_phenotypes", default=False,
+                  help="Remove missing phenotypes")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -130,11 +133,13 @@ elif cmd == 'redis':
     # Emulating the redis setup of GN2
     G = g
     print "Original G",G.shape, "\n", G
-    if y != None:
+    if y != None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
         Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
         G = g.T
         print "Removed missing phenotypes",G.shape, "\n", G
+    else:
+        Y = y
     if options.maf_normalization:
         G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
         print "MAF replacements: \n",G
@@ -159,12 +164,12 @@ elif cmd == 'redis':
         assert p1==0.0897, "p1=%f" % p1
         assert p2==0.0405, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
-        assert p1==0.8984, "p1=%f" % p1
-        assert p2==0.8827, "p2=%f" % p2
+        assert round(sum(ps)) == 4070
+        assert len(ps) == 8000
 elif cmd == 'kinship':
     G = g
     print "Original G",G.shape, "\n", G
-    if y != None:
+    if y != None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
         Y,g = phenotype.remove_missing(y,g.T,options.verbose)
         G = g.T
@@ -193,11 +198,11 @@ elif cmd == 'kinship':
     sys.stderr.write(options.geno+"\n")
     k3 = round(K3[0][0],4)
     if options.geno == 'data/small.geno':
-        assert k1==0.7939, "k1=%f" % k1
+        assert k1==0.8, "k1=%f" % k1
         assert k2==0.7939, "k2=%f" % k2
         assert k3==0.7939, "k3=%f" % k3
     if options.geno == 'data/small_na.geno':
-        assert k1==0.7172, "k1=%f" % k1
+        assert k1==0.8333, "k1=%f" % k1
         assert k2==0.7172, "k2=%f" % k2
         assert k3==0.7172, "k3=%f" % k3
     if options.geno == 'data/test8000.geno':
-- 
cgit 1.4.1


From 42c94e94414da06d85dbb6f53e77eb660624b5d8 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 16:35:20 +0300
Subject: README: Started release information and versioning of module

---
 wqflask/wqflask/my_pylmm/README.md         | 9 +++++++++
 wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 1 +
 2 files changed, 10 insertions(+)
 create mode 100644 wqflask/wqflask/my_pylmm/README.md

diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
new file mode 100644
index 00000000..0c809610
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -0,0 +1,9 @@
+# RELEASE NOTES
+
+## 0.50-gn2-pre1
+
+1. This is the first test release of multi-core pylmm into GN2. Both
+   Kinship K calculation and GWAS have been made multi-threaded by
+   introducing the Python multiprocessing module. Note that only
+   run_other has been updated to use the new routines.
+  
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index e69de29b..c40c3221 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -0,0 +1 @@
+PYLMM_VERSION="0.50-gn2-pre1"
-- 
cgit 1.4.1


From c4132c7ec5e0d720c4229563c9edddbcfc2e7fa6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 17:03:53 +0300
Subject: GWAS: sort results so we always get the same list

---
 wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index ce7842f7..b901c0e2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -110,13 +110,13 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
    # PS = []
    # TS = []
    count = 0
+   job = 0
    jobs_running = 0
    for snp in G:
       snp_id = (snp,'SNPID')
       count += 1
-      # print count,snp_id
       if count % 1000 == 0:
-         job = count/1000
+         job += 1
          if verbose:
             sys.stderr.write("Job %d At SNP %d\n" % (job,count))
          if numThreads == 1:
@@ -126,7 +126,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
             j,lst = q.get()
             if verbose:
                sys.stderr.write("Job "+str(j)+" finished\n")
-            res.append(lst)
+            res.append((j,lst))
          else:
             p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
             jobs_running += 1
@@ -136,7 +136,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
                   j,lst = q.get_nowait()
                   if verbose:
                      sys.stderr.write("Job "+str(j)+" finished\n")
-                  res.append(lst)
+                  res.append((j,lst))
                   jobs_running -= 1
                except Queue.Empty:
                   time.sleep(0.1)
@@ -149,21 +149,25 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
       collect.append(snp_id)
 
    if numThreads==1 or count<1000 or len(collect)>0:
-      print "Running on 1 THREAD"
-      compute_snp(count/1000,n,collect,lmm2,reml,q)
+      job += 1
+      print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+      compute_snp(job,n,collect,lmm2,reml,q)
       collect = []
       j,lst = q.get()
-      res.append(lst)
+      res.append((j,lst))
    print "count=",count," running=",jobs_running," collect=",len(collect)
    for job in range(jobs_running):
       j,lst = q.get(True,15) # time out
       if verbose:
          sys.stderr.write("Job "+str(j)+" finished\n")
-      res.append(lst)
-
-   if verbose:
-      print "res=",res[0][0:10]
-   print [len(res1) for res1 in res]
-   ts = [item[0] for res1 in res for item in res1]
-   ps = [item[1] for res1 in res for item in res1]
+      res.append((j,lst))
+
+   print "Before sort",[res1[0] for res1 in res]
+   res = sorted(res,key=lambda x: x[0])
+   # if verbose:
+   #    print "res=",res[0][0:10]
+   print "After sort",[res1[0] for res1 in res]
+   print [len(res1[1]) for res1 in res]
+   ts = [item[0] for j,res1 in res for item in res1]
+   ps = [item[1] for j,res1 in res for item in res1]
    return ts,ps
-- 
cgit 1.4.1


From e615b83d34bcc991e671241db71c6ad0c0267479 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 17 Mar 2015 17:15:56 +0300
Subject: Release 0.50-gn2-pre1

---
 wqflask/wqflask/my_pylmm/README.md | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index 0c809610..f6b0e72d 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,9 +1,21 @@
 # RELEASE NOTES
 
-## 0.50-gn2-pre1
+## 0.50-gn2-pre1 release
+
+This is the first test release of multi-core pylmm into GN2. Both
+kinship calculation and GWAS have been made multi-threaded by
+introducing the Python multiprocessing module. Note that only
+run_other has been updated to use the new routines (so human is still
+handled the old way). I have taken care that we can still run both
+old-style and new-style LMM (through passing the 'new_code'
+boolean). This could be an option in the web server for users to
+select and test for any unexpected differences (of which there should
+be none, naturally ;).
+
+The current version can handle missing phenotypes, but as they are
+removed there is no way for GN2 to know what SNPs the P-values belong
+to. A future version will pass a SNP index to allow for missing
+phenotypes.
+
 
-1. This is the first test release of multi-core pylmm into GN2. Both
-   Kinship K calculation and GWAS have been made multi-threaded by
-   introducing the Python multiprocessing module. Note that only
-   run_other has been updated to use the new routines.
   
\ No newline at end of file
-- 
cgit 1.4.1


From 5fb4245a3d08f7107cf885639fc275a6d73ea82d Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 6 Apr 2015 15:46:46 +0000
Subject: Removed dataSharing directory and its files since they aren't being
 used, but I should note that a couple of them were changed from GN1; I think
 that there was some attempt to implement this very early into GN2's
 development, but we'd probably be better off starting from scratch since a
 number of things have changed since then (such as the way we call queries)

Removed the Singapore link from the mirrors section of the index page since
it wasn't working
---
 wqflask/wqflask/dataSharing/SharingBody.py         | 290 ---------------------
 wqflask/wqflask/dataSharing/SharingInfo.py         | 146 -----------
 wqflask/wqflask/dataSharing/SharingInfoAddPage.py  |  47 ----
 .../wqflask/dataSharing/SharingInfoDeletePage.py   |  55 ----
 wqflask/wqflask/dataSharing/SharingInfoEditPage.py |  51 ----
 wqflask/wqflask/dataSharing/SharingInfoPage.py     |  64 -----
 .../wqflask/dataSharing/SharingInfoUpdatePage.py   | 109 --------
 .../wqflask/dataSharing/SharingListDataSetPage.py  |  99 -------
 wqflask/wqflask/dataSharing/SharingPage.py         |  40 ---
 wqflask/wqflask/dataSharing/__init__.py            |   0
 wqflask/wqflask/templates/index_page.html          |   1 -
 11 files changed, 902 deletions(-)
 delete mode 100755 wqflask/wqflask/dataSharing/SharingBody.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfo.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoAddPage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoDeletePage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoEditPage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoPage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingListDataSetPage.py
 delete mode 100755 wqflask/wqflask/dataSharing/SharingPage.py
 delete mode 100755 wqflask/wqflask/dataSharing/__init__.py

diff --git a/wqflask/wqflask/dataSharing/SharingBody.py b/wqflask/wqflask/dataSharing/SharingBody.py
deleted file mode 100755
index 880161f5..00000000
--- a/wqflask/wqflask/dataSharing/SharingBody.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-sharing_body_string = """
-<TD vAlign=top width="100%" align="left" height=10 bgColor=#eeeeee>
-
-                                                        <p style="font-size:18px;font-family:verdana;color:black"><B> Data Set Download</B></p>
-                                                        <Form METHOD="get" ACTION="/webqtl/main.py" name="SEARCHFORM">
-
-                                                                <TABLE width="800" border="0">
-
-                                        <!--  SPECIES  SELECTION -->
-                                                                        <TR>
-                                                                                <TD align=right height="35" style="font-size:14px;font-family:verdana;color:black" width="16%">
-                                                                                        <B>Species:</B>
-                                                                                </TD>
-                                                                                <TD width="3%">
-                                                                                </TD>
-                                                                                <TD NOWRAP width="85%" align="left">
-                                                                                        <DIV Id="menu0">
-                                                                                                <Select NAME="species" size=1 id="species" onchange="fillOptions('species');">
-                                                                                                </Select>
-                                                                                        </DIV>
-                                                                                </TD>
-                                                                        </TR>
-
-                                        <!--  GROUP  SELECTION -->
-                                                                        <TR>
-                                                                                <TD align="right" height="35" style="font-size:14px;font-family:verdana;color:black">
-                                                                                        <B>Group:</B>
-                                                                                </TD>
-                                                                                <TD>
-                                                                                </TD>
-                                                                                <TD NOWRAP width="85%" align="left">
-                                                                                        <DIV Id="menu1">
-
-                                                                                                <Select NAME="cross" size=1 id="cross" onchange="fillOptions('cross');">
-                                                                                                </Select>
-                                                                                        <input type="button" class="button" value=" Info " onCLick="javascript:crossinfo();">
-                                                                                        </DIV>
-                                                                                </TD>
-                                                                        </TR>
-
-                                        <!--  TYPE  SELECTION -->
-                                                                        <TR>
-                                                                                <TD align=right height=35 style="font-size:14px;font-family:verdana;color:black">
-                                                                                        <B>Type:</B>
-                                                                                </TD>
-                                                                                <TD>
-                                                                                </TD>
-                                                                                <TD NOWRAP width="85%" align="left">
-                                                                                        <DIV Id="menu2">
-                                                                                                <Select NAME="tissue" size=1 id="tissue" onchange="fillOptions('tissue');">
-
-                                                                                                </Select>
-                                                                                        </DIV>
-                                                                                </TD>
-                                                                        </TR>
-
-                                        <!--  DATABASE  SELECTION -->
-                                                                        <TR>
-                                                                                <TD align=right height=35 style="font-size:14px;font-family:verdana;color:black">
-                                                                                        <B>Database:</B>
-                                                                                </TD>
-                                                                                <TD>
-                                                                                </TD>
-                                                                                <TD NOWRAP width="85%" align="left">
-                                                                                        <DIV Id="menu3">
-                                                                                                <Select NAME="database" size=1 id="database">
-                                                                                                </Select>
-                                                                                                <input type="button" class="button" value=" Info " onCLick="javascript:databaseinfo();">
-                                                                                        </DIV>
-                                                                                </TD>
-                                                                        </TR>
-
-<!--  SEARCH, MAKE DEFAULT, ADVANCED SEARCH -->
-                                                                        <TR>
-                                                                                <td></td>
-                                                                                <td></td>
-                                                                                <TD ALIGN="left" HEIGHT="40">
-                                                                                        &nbsp;&nbsp;&nbsp;<INPUT TYPE="button" CLASS="button" STYLE="font-size:12px" VALUE="&nbsp;&nbsp;Download&nbsp;&nbsp;" onCLick="javascript:datasetinfo();">
-                                                                                </TD>
-                                                                        </TR>
-                                                                </TABLE>
-
-                                                                <SCRIPT SRC="/javascript/selectDatasetMenu.js"></SCRIPT>
-                                                        </FORM>
-
-                                                        <p style="font-size:18px;font-family:verdana;color:black"><B> GeneNetwork Accession Number</B></p>
-                                                        <form method="get" action="/webqtl/main.py" name="f2" target="_blank">
-                                                                <INPUT TYPE="hidden" NAME="FormID" VALUE="sharinginfo">
-                                                                <TABLE width="800" border="0">
-                                                                        <tr>
-                                                                                <td align=right height="35" style="font-size:14px;font-family:verdana;color:black" width="16%"><b>GN:</b></td>
-                                                                                <td width=3%></td>
-                                                                                <td><input type="text" name="GN_AccessionId" size="40" />&nbsp;&nbsp;E.g. 112</td>
-                                                                        </tr>
-                                                                        <tr>
-                                                                                <td></td>
-                                                                                <td></td>
-                                                                                <td HEIGHT="40">
-                                                                                        &nbsp;&nbsp;&nbsp;<input type="Submit" class="button" STYLE="font-size:12px" VALUE="&nbsp;&nbsp;&nbsp;Submit&nbsp;&nbsp;&nbsp;">
-                                                                                </td>
-                                                                        </tr>
-                                                                </table>
-                                                        </form>
-
-</td>
-"""
-
-sharinginfo_body_string = """<td>
-<a href="/webqtl/main.py?FormID=sharingListDataset">List of DataSets</a><br>
-<H1 class="title" id="parent-fieldname-title">%s
-<a href="/webqtl/main.py?FormID=sharinginfoedit&GN_AccessionId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>
-<span style="color:red;">%s</span>
-</H1>
-<table border="0" width="100%%">
-<tr>
-<td valign="top" width="50%%">
-<TABLE cellSpacing=0 cellPadding=5 width=100%% border=0>
-                      <TR><td><b>GN Accession:</b> GN%s</TD></tr>
-                      <TR><TD><b>GEO Series:</b> %s</TD></TR>
-                      <TR><TD><b>Title:</b> %s</TD></TR>
-                      <TR><TD><b>Organism:</b> <a href=http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=%s>%s</a></TD></tr>
-                      <tr><TD><b>Group:</b> %s</TD></TR>
-                      <TR><TD><b>Tissue:</b> %s</TD></tr>
-                      <tr><TD><b>Dataset Status:</b> %s</TD></tr>
-                      <TR><TD><b>Platforms:</b> %s</TD></TR>
-                      <TR><TD><b>Normalization:</b> %s</TD></TR>
-                      <TR><TD><!--Code below to Show hide Contact information -->
-                       <a href="#" onclick="colapse('answer1')">See Contact Information</a><br>
-                       <span id="answer1" style="display: none; return: false;">
-                                           %s<br>
-                       %s<br>
-                       %s<br>
-                       %s<br>
-                       %s, %s %s %s<br>
-                       Tel. %s<br>
-                       %s<br>
-                       <a href="%s">%s</a>
-                       </span><!--Code above to Show hide Contact information --></TD></TR>
-</TABLE>
-</td>
-<td valign="top" width="50%%">
-<table border="0" width="100%%">
-<tr>
-        <td bgcolor="#dce4e1"><b>Download datasets and supplementary data files</b></td>
-</tr>
-<tr>
-        <td>%s</td>
-</tr>
-</table>
-</td>
-</tr>
-</table>
-<HR>
-<p>
-<table width="100%%" border="0" cellpadding="5" cellspacing="0">
-<tr><td><span style="font-size:115%%;font-weight:bold;">Summary:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">About the cases used to generate this set of data:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">About the tissue used to generate this set of data:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">About downloading this data set:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">About the array platform:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">About data values and data processing:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Data source acknowledgment:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Experiment Type:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Overall Design:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Contributor:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Citation:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Submission Date:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Laboratory:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-<tr><td><span style="font-size:115%%;font-weight:bold;">Samples:</span></td></tr>
-        <tr><td> %s<br><br></td></tr>
-</table>
-</p>
-</td>
-"""
-
-sharinginfoedit_body_string = """<td>
-<H1 class="title">%s</H1>
-<script language="javascript">
-function CheckGNAccesionId(){
-        if (document.sharinginfoupdate.GN_AccesionId.value.length  ==  0){
-                alert("Please input GN Accesion Id");
-                document.sharinginfoupdate.GN_AccesionId.focus();
-                return false;
-        } else {
-                return true;
-        }
-}
-</script>
-<table border="0" CELLSPACING="0" CELLPADDING="8">
-<form name="sharinginfoupdate" method="post" action="/webqtl/main.py?FormID=sharinginfoupdate" onsubmit="return CheckGNAccesionId();">
-<input type="hidden" name="Id" value="%s">
-
-  <tr><TH COLSPAN=2><h2 class="title">Principal Investigator</h2></TH></tr>
-   <tr><td align="right" width="100"><b>Contact Name:</b></td><td width="200"><input type='text' name='Contact_Name' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Emails:</b></td><td><input type='text' name='Emails' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Phone:</b></td><td><input type='text' name='Phone' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>URL:</b></td><td><input type='text' name='URL' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Organization Name:</b></td><td><input type='text' name='Organization_Name' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Department:</b></td><td><input type='text' name='Department' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Laboratory:</b></td><td><input type='text' name='Laboratory' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Address:</b></td><td><input type='text' name='Street' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>City:</b></td><td><input type='text' name='City' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>State:</b></td><td><input type='text' name='State' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>ZIP:</b></td><td><input type='text' name='ZIP' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Country:</b></td><td><input type='text' name='Country' size='90' value='%s'></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Summary</h2></TH></tr>
-  <tr><td align="right"><b>Summary: </b></td><td><TEXTAREA NAME="Summary" cols="77" rows="15">%s</textarea></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Biology</h2></TH></tr>
-  <tr><td align="right"><b>Experiment Design:</b></td><td><TEXTAREA NAME="Experiment_Type" cols="77" rows="15">%s</textarea></td></tr>
-  <tr><td align="right"><b>About the cases used to<br>generate this set of data:</b></td><td><TEXTAREA NAME="About_Cases" cols="77" rows="15">%s</textarea></td></tr>
-  <tr><td align="right"><b>About the tissue used to<br>generate this set of data:</b></td><td><TEXTAREA NAME="About_Tissue" cols="77" rows="15">%s</textarea></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Technique</h2></TH></tr>
-  <tr><td align="right"><b>About downloading this data set:</b></td><td><TEXTAREA NAME="About_Download" cols="77" rows="15">%s</textarea></td></tr>
-  <tr><td align="right"><b>About the array platform:</b></td><td><TEXTAREA NAME="About_Array_Platform" cols="77" rows="15">%s</textarea></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Bioinformatics</h2></TH></tr>
-  <tr><td align="right"><b>About data values and<br>data processing:</b></td><td><TEXTAREA NAME="About_Data_Values_Processing" cols="77" rows="15">%s</textarea></td></tr>
-  <tr><td align="right"><b>Overall Design:</b></td><td><TEXTAREA NAME="Overall_Design" cols="77" rows="15">%s</textarea></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Misc</h2></TH></tr>
-  <tr><td align="right"><b>Contributor:</b></td><td><TEXTAREA NAME="Contributor" cols="77" rows="15">%s</textarea></td></tr>
-  <tr><td align="right"><b>Citation:</b></td><td><TEXTAREA NAME="Citation" cols="77" rows="5">%s</textarea></td></tr>
-  <tr><td align="right"><b>Data source acknowledgment:</b></td><td><TEXTAREA NAME="Data_Source_Acknowledge" cols="77" rows="15">%s</textarea></td></tr>
-
-  <tr><TH COLSPAN=2><h2 class="title">Administrator ONLY</h2></TH></tr>
-  <tr><td align="right"><b>GN Accesion Id:</b></td><td><input type='text' name='GN_AccesionId' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>DB Title in GN:</b></td><td><input type='text' name='InfoPageTitle' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>GEO Series:</b></td><td><input type='text' name='GEO_Series' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Status:</b></td><td><input type='text' name='Status' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Title:</b></td><td><input type='text' name='Title' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Organism_Id (Taxonomy ID):</b></td><td><input type='text' name='Organism_Id' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Organism:</b></td><td><input type='text' name='Organism' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Submission Date:</b></td><td><input type='text' name='Submission_Date' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Platforms:</b></td><td><input type='text' name='Platforms' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Species:</b></td><td><input type='text' name='Species' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Tissue:</b></td><td><input type='text' name='Tissue' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Normalization:</b></td><td><input type='text' name='Normalization' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Inbred Set:</b></td><td><input type='text' name='InbredSet' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Info Page Name:</b></td><td><input type='text' name='InfoPageName' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Samples:</b></td><td><input type='text' name='Samples' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Authorized Users:</b></td><td><input type='text' name='AuthorizedUsers' size='90' value='%s'></td></tr>
-   <tr><td align="right"><b>Progress:</b></td><td><input type='text' name='Progress' size='90' value='%s'></td></tr>
-
-  <tr><td> <colspan='2' align="center"><input type="Submit" class="button" style="font-size:12px" value="  Submit  "></td></tr>
-
-</form>
-</table>
-</td>"""
diff --git a/wqflask/wqflask/dataSharing/SharingInfo.py b/wqflask/wqflask/dataSharing/SharingInfo.py
deleted file mode 100755
index 41a75222..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfo.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from __future__ import print_function, division
-
-from pprint import pformat as pf
-from collections import namedtuple
-
-import requests
-
-from dbFunction import webqtlDatabaseFunction
-import SharingBody
-
-#import logging
-#logging.basicConfig(filename="/tmp/flask_gn_log", level=logging.INFO)
-#
-#_log = logging.getLogger("search")
-#_ch = logging.StreamHandler()
-#_log.addHandler(_ch)
-
-
-
-#########################################
-#      Sharing Info
-#########################################
-class SharingInfo(object):
-
-    def __init__(self, GN_AccessionId, InfoPageName):
-        print("In SharingInfo")
-        self.GN_AccessionId = GN_AccessionId
-        self.InfoPageName = InfoPageName
-
-    def getInfo(self):
-        cursor = webqtlDatabaseFunction.getCursor()
-        if (not cursor):
-            return
-
-        field_names = """Id, GEO_Series, Status, Title, Organism, Experiment_Type,
-                Summary, Overall_Design, Contributor, Citation, Submission_Date,
-                Contact_Name, Emails, Phone, URL, Organization_Name, Department,
-                Laboratory, Street, City, State, ZIP, Country, Platforms,
-                Samples, Species, Normalization, InbredSet, InfoPageName,
-                DB_Name, Organism_Id, InfoPageTitle, GN_AccesionId, Tissue,
-                AuthorizedUsers, About_Cases, About_Tissue, About_Download,
-                About_Array_Platform, About_Data_Values_Processing,
-                Data_Source_Acknowledge, Progreso """
-
-        # We can use string interpolation here cause we own the string
-        sql = """select %s from InfoFiles where """ % (field_names)
-        if self.GN_AccessionId:
-            sql += "GN_AccesionId = %s"
-            cursor.execute(sql, self.GN_AccessionId)
-        elif self.InfoPageName:
-            sql += "InfoPageName = %s"
-            cursor.execute(sql, self.InfoPageName)
-        else:
-            raise Exception('No correct parameter found')
-        info = cursor.fetchone()
-
-        info = todict(field_names, info)
-
-        # fetch datasets file list
-        filelist = []
-        if info["GN_AccesionId"]:
-            url = "http://atlas.uthsc.edu/scandatasets.php?GN_AccesionId=%s" % (
-                info["GN_AccesionId"])
-            try:
-                response = requests.get(url)
-            except Exception as why:
-                log.exception("Problem conneting to:", url)
-            if response:
-                data = response.text
-                filelist = data.split()
-
-        return info, filelist
-
-
-    def getBody(self, infoupdate=""):
-        info, filelist = self.getInfo()
-        if filelist:
-            htmlfilelist = '<ul style="line-height:160%;">\n'
-            for i in range(len(filelist)):
-                if i%2==0:
-                    filename = filelist[i]
-                    filesize = filelist[i+1]
-                    htmlfilelist += "<li>"
-                    htmlfilelist += '<a href="ftp://atlas.uthsc.edu/users/shared/Genenetwork/GN%s/%s">%s</a>' % (self.GN_AccessionId, filename, filename)
-                    htmlfilelist += '&nbsp;&nbsp;&nbsp;'
-                    #r=re.compile(r'(?<=\d)(?=(\d\d\d)+(?!\d))')
-                    #htmlfilelist += '[%s&nbsp;B]' % r.sub(r',',filesize)
-                    if 12 < len(filesize):
-                        filesize=filesize[0:-12]
-                        filesize += ' T'
-                    elif 9 < len(filesize):
-                        filesize=filesize[0:-9]
-                        filesize += ' G'
-                    elif 6 < len(filesize):
-                        filesize=filesize[0:-6]
-                        filesize += ' M'
-                    elif 3 < len(filesize):
-                        filesize=filesize[0:-3]
-                        filesize += ' K'
-                    htmlfilelist += '[%sB]' % filesize
-                    htmlfilelist += "</li>\n"
-            htmlfilelist += "</ul>"
-        else:
-            htmlfilelist = "Data sets are not available or are not public yet."
-
-        return info, htmlfilelist
-        #return SharingBody.sharinginfo_body_string % (info[31], info[32], infoupdate, info[32], info[1], info[3], info[30], info[4], info[27], info[33], info[2], info[23], info[26], info[11], info[15], info[16], info[18], info[19], info[20], info[21], info[22], info[13], info[12], info[14], info[14], htmlfilelist, info[6], info[35], info[36], info[37], info[38], info[39], info[40], info[5], info[7], info[8], info[9], info[10], info[17], info[24])
-
-
-def todict(fields, values):
-    """Converts sql results into a user friendly dictionary"""
-    new_dict = {}
-    fields = fields.split(",")
-    for counter, field in enumerate(fields):
-        field = field.strip()
-        value = values[counter]
-        if isinstance(value, str):
-            value = unicode(value, "utf-8")
-        new_dict[field] = value
-    return new_dict
diff --git a/wqflask/wqflask/dataSharing/SharingInfoAddPage.py b/wqflask/wqflask/dataSharing/SharingInfoAddPage.py
deleted file mode 100755
index 452fb474..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfoAddPage.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-from base import webqtlConfig
-import SharingBody
-import SharingInfo
-
-
-#########################################
-#      Sharing Info Edit Page
-#########################################
-class SharingInfoAddPage(templatePage):
-
-    def __init__(self, fd=None):
-        templatePage.__init__(self, fd)
-        if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']:
-            pass
-        else:
-            heading = "Adding Info"
-            detail = ["You don't have the permission to add new dataset"]
-            self.error(heading=heading,detail=detail,error="Error")
-            return
-        self.dict['body'] = SharingBody.sharinginfoedit_body_string % ("Add new dataset", "-1", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "")
diff --git a/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py b/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py
deleted file mode 100755
index a9c785c6..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-from base import webqtlConfig
-from dbFunction import webqtlDatabaseFunction
-import SharingBody
-import SharingInfo
-
-
-#########################################
-#      Sharing Info Delete Page
-#########################################
-class SharingInfoDeletePage(templatePage):
-
-    def __init__(self, fd=None):
-        templatePage.__init__(self, fd)
-        if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']:
-            pass
-        else:
-            heading = "Deleting Info"
-            detail = ["You don't have the permission to delete this dataset"]
-            self.error(heading=heading,detail=detail,error="Error")
-            return
-        cursor = webqtlDatabaseFunction.getCursor()
-        if (not cursor):
-            return
-        GN_AccessionId = fd.formdata.getvalue('GN_AccessionId')
-        sql = "delete from InfoFiles where GN_AccesionId=%s"
-        cursor.execute(sql, GN_AccessionId)
-        re = cursor.fetchone()
-        self.dict['body'] = "Delete dataset info record (GN_AccesionId=%s) successfully." % GN_AccessionId
diff --git a/wqflask/wqflask/dataSharing/SharingInfoEditPage.py b/wqflask/wqflask/dataSharing/SharingInfoEditPage.py
deleted file mode 100755
index c5f4ed22..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfoEditPage.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-from base import webqtlConfig
-import SharingBody
-import SharingInfo
-
-
-#########################################
-#      Sharing Info Edit Page
-#########################################
-class SharingInfoEditPage(templatePage):
-
-    def __init__(self, fd=None):
-        templatePage.__init__(self, fd)
-        if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']:
-            pass
-        else:
-            heading = "Editing Info"
-            detail = ["You don't have the permission to edit this dataset"]
-            self.error(heading=heading,detail=detail,error="Error")
-            return
-        GN_AccessionId = fd.formdata.getvalue('GN_AccessionId')
-        InfoPageName = fd.formdata.getvalue('InfoPageName')
-        sharingInfoObject = SharingInfo.SharingInfo(GN_AccessionId, InfoPageName)
-        info, filelist = sharingInfoObject.getInfo()
-        self.dict['body'] = SharingBody.sharinginfoedit_body_string % (info[31], info[0], info[11], info[12], info[13], info[14], info[15], info[16], info[17], info[18], info[19], info[20], info[21], info[22], info[6], info[5], info[35], info[36], info[37], info[38], info[39], info[7], info[8], info[9], info[40], info[32], info[31], info[1], info[2], info[3], info[30], info[4], info[10], info[23], info[25], info[33], info[26], info[27], info[28], info[24], info[34], info[41])
diff --git a/wqflask/wqflask/dataSharing/SharingInfoPage.py b/wqflask/wqflask/dataSharing/SharingInfoPage.py
deleted file mode 100755
index c32eec50..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfoPage.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from __future__ import print_function, division
-
-from pprint import pformat as pf
-
-import urlparse
-
-import flask
-
-from base import webqtlConfig
-from dbFunction import webqtlDatabaseFunction
-import SharingBody
-import SharingInfo
-
-
-#########################################
-#      Sharing Info Page
-#########################################
-class SharingInfoPage():
-
-    def __init__(self, fd):
-        self.redirect_url = None   # Set if you want a redirect
-        print("fd is:", pf(fd.__dict__))
-        # Todo: Need a [0] in line below????d
-        GN_AccessionId = fd.get('GN_AccessionId')   # Used under search datasharing
-        InfoPageName = fd.get('database')  # Might need to add a [0]
-        cursor = webqtlDatabaseFunction.getCursor()
-        if InfoPageName and not GN_AccessionId:
-            sql = "select GN_AccesionId from InfoFiles where InfoPageName = %s"
-            cursor.execute(sql, InfoPageName)
-            GN_AccessionId = cursor.fetchone()
-            self.redirect_url = urlparse.urljoin(webqtlConfig.ROOT_URL, "/data_sharing?GN_AccessionId=%s" % GN_AccessionId)
-            #self.redirect_url = flask.url_for('data_sharing', GN_AccessionId=GN_AccessionId[0])
-            print("set self.redirect_url")
-            #print("before redirect")
-            #return flask.redirect(url)
-            #print("after redirect")
-        else:
-            CauseError
diff --git a/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py b/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py
deleted file mode 100755
index 181f2eed..00000000
--- a/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-import MySQLdb
-
-from base.templatePage import templatePage
-from base import webqtlConfig
-from dbFunction import webqtlDatabaseFunction
-import SharingBody
-import SharingInfo
-
-#########################################
-#      Sharing Info Update Page
-#########################################
-class SharingInfoUpdatePage(templatePage):
-
-    def __init__(self, fd=None):
-        templatePage.__init__(self, fd)
-        if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']:
-            pass
-        else:
-            heading = "Editing Info"
-            detail = ["You don't have the permission to modify this file"]
-            self.error(heading=heading,detail=detail,error="Error")
-            return
-        cursor = webqtlDatabaseFunction.getCursor()
-        if (not cursor):
-            return
-        Id=fd.formdata.getvalue('Id')
-        GN_AccesionId=fd.formdata.getvalue('GN_AccesionId')
-        GEO_Series=fd.formdata.getvalue('GEO_Series')
-        Status=fd.formdata.getvalue('Status')
-        Title=fd.formdata.getvalue('Title')
-        Organism_Id=fd.formdata.getvalue('Organism_Id')
-        Organism=fd.formdata.getvalue('Organism')
-        Experiment_Type =fd.formdata.getvalue('Experiment_Type')
-        Summary=fd.formdata.getvalue('Summary')
-        Overall_Design=fd.formdata.getvalue('Overall_Design')
-        Contributor=fd.formdata.getvalue('Contributor')
-        Citation=fd.formdata.getvalue('Citation')
-        Submission_Date=fd.formdata.getvalue('Submission_Date')
-        Contact_Name=fd.formdata.getvalue('Contact_Name')
-        Emails=fd.formdata.getvalue('Emails')
-        Phone=fd.formdata.getvalue('Phone')
-        URL=fd.formdata.getvalue('URL')
-        Organization_Name=fd.formdata.getvalue('Organization_Name')
-        Department=fd.formdata.getvalue('Department')
-        Laboratory=fd.formdata.getvalue('Laboratory')
-        Street=fd.formdata.getvalue('Street')
-        City=fd.formdata.getvalue('City')
-        State=fd.formdata.getvalue('State')
-        ZIP=fd.formdata.getvalue('ZIP')
-        Country=fd.formdata.getvalue('Country')
-        Platforms=fd.formdata.getvalue('Platforms')
-        Samples=fd.formdata.getvalue('Samples')
-        Species=fd.formdata.getvalue('Species')
-        Tissue=fd.formdata.getvalue('Tissue')
-        Normalization=fd.formdata.getvalue('Normalization')
-        InbredSet=fd.formdata.getvalue('InbredSet')
-        InfoPageName=fd.formdata.getvalue('InfoPageName')
-        InfoPageTitle=fd.formdata.getvalue('InfoPageTitle')
-        About_Cases=fd.formdata.getvalue('About_Cases')
-        About_Tissue=fd.formdata.getvalue('About_Tissue')
-        About_Download=fd.formdata.getvalue('About_Download')
-        About_Array_Platform=fd.formdata.getvalue('About_Array_Platform')
-        About_Data_Values_Processing=fd.formdata.getvalue('About_Data_Values_Processing')
-        Data_Source_Acknowledge=fd.formdata.getvalue('Data_Source_Acknowledge')
-        AuthorizedUsers=fd.formdata.getvalue('AuthorizedUsers')
-        Progress=fd.formdata.getvalue('Progress')
-        if Id=='-1':
-            sharingInfoObject = SharingInfo.SharingInfo(GN_AccesionId, InfoPageName)
-            info, filelist = sharingInfoObject.getInfo()
-            if info:
-                heading = "Editing Info"
-                detail = ["The new dataset info record is duplicate."]
-                self.error(heading=heading, detail=detail, error="Error")
-                return
-            sql = """INSERT INTO InfoFiles SET GN_AccesionId=%s, GEO_Series=%s, Status=%s, Title=%s, Organism_Id=%s, Organism=%s, Experiment_Type=%s, Summary=%s, Overall_Design=%s, Contributor=%s, Citation=%s, Submission_Date=%s, Contact_Name=%s, Emails=%s, Phone=%s, URL=%s, Organization_Name=%s, Department=%s, Laboratory=%s, Street=%s, City=%s, State=%s, ZIP=%s, Country=%s, Platforms=%s, Samples=%s, Species=%s, Tissue=%s, Normalization=%s, InbredSet=%s, InfoPageName=%s, InfoPageTitle=%s, About_Cases=%s, About_Tissue=%s, About_Download=%s, About_Array_Platform=%s, About_Data_Values_Processing=%s, Data_Source_Acknowledge=%s, AuthorizedUsers=%s, Progreso=%s"""
-            cursor.execute(sql, tuple([GN_AccesionId, GEO_Series, Status, Title, Organism_Id, Organism, Experiment_Type, Summary, Overall_Design, Contributor, Citation, Submission_Date, Contact_Name, Emails, Phone, URL, Organization_Name, Department, Laboratory, Street, City, State, ZIP, Country, Platforms, Samples, Species, Tissue, Normalization, InbredSet, InfoPageName, InfoPageTitle, About_Cases, About_Tissue, About_Download, About_Array_Platform, About_Data_Values_Processing, Data_Source_Acknowledge, AuthorizedUsers, Progress]))
-            infoupdate="This record has been succesfully added."
-        else:
-            sql = """UPDATE InfoFiles SET GN_AccesionId=%s, GEO_Series=%s, Status=%s, Title=%s, Organism_Id=%s, Organism=%s, Experiment_Type=%s, Summary=%s, Overall_Design=%s, Contributor=%s, Citation=%s, Submission_Date=%s, Contact_Name=%s, Emails=%s, Phone=%s, URL=%s, Organization_Name=%s, Department=%s, Laboratory=%s, Street=%s, City=%s, State=%s, ZIP=%s, Country=%s, Platforms=%s, Samples=%s, Species=%s, Tissue=%s, Normalization=%s, InbredSet=%s, InfoPageName=%s, InfoPageTitle=%s, About_Cases=%s, About_Tissue=%s, About_Download=%s, About_Array_Platform=%s, About_Data_Values_Processing=%s, Data_Source_Acknowledge=%s, AuthorizedUsers=%s, Progreso=%s WHERE Id=%s"""
-            cursor.execute(sql, tuple([GN_AccesionId, GEO_Series, Status, Title, Organism_Id, Organism, Experiment_Type, Summary, Overall_Design, Contributor, Citation, Submission_Date, Contact_Name, Emails, Phone, URL, Organization_Name, Department, Laboratory, Street, City, State, ZIP, Country, Platforms, Samples, Species, Tissue, Normalization, InbredSet, InfoPageName, InfoPageTitle, About_Cases, About_Tissue, About_Download, About_Array_Platform, About_Data_Values_Processing, Data_Source_Acknowledge, AuthorizedUsers, Progress, Id]))
-            infoupdate="This record has been succesfully updated."
-        sharingInfoObject = SharingInfo.SharingInfo(GN_AccesionId, InfoPageName)
-        self.dict['body'] = sharingInfoObject.getBody(infoupdate=infoupdate)
diff --git a/wqflask/wqflask/dataSharing/SharingListDataSetPage.py b/wqflask/wqflask/dataSharing/SharingListDataSetPage.py
deleted file mode 100755
index 8685ac65..00000000
--- a/wqflask/wqflask/dataSharing/SharingListDataSetPage.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from htmlgen import HTMLgen2 as HT
-from base import webqtlConfig
-
-from base.templatePage import templatePage
-
-
-#########################################
-#      Sharing List DataSet Page
-#########################################
-class SharingListDataSetPage(templatePage):
-
-    def __init__(self, fd=None):
-        templatePage.__init__(self, fd)
-
-        if not self.openMysql():
-            return
-
-        if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']:
-            pass
-        else:
-            heading = "Editing Info"
-            detail = ["You don't have the permission to list the datasets"]
-            self.error(heading=heading,detail=detail,error="Error")
-            return
-
-
-        TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee')
-
-        query = """select GN_AccesionId, InfoPageTitle, Progreso from InfoFiles order by GN_AccesionId"""
-        self.cursor.execute(query)
-        result = self.cursor.fetchall()
-
-        heading = HT.Paragraph('Dataset Table', Class="title")
-
-        newrecord = HT.Href(text="New Record", url="/webqtl/main.py?FormID=sharinginfoadd")
-
-        info = "Click the accession id to view the dataset info. Click the dataset name to edit the dataset info."
-
-        datasetTable = HT.TableLite(border=0, cellpadding=0, cellspacing=0, Class="collap", width="100%")
-
-        tableHeaderRow = HT.TR()
-        tableHeaderRow.append(HT.TD("Accession Id", Class='fs14 fwb ffl b1 cw cbrb', align="center"))
-        tableHeaderRow.append(HT.TD("Dataset name", Class='fs14 fwb ffl b1 cw cbrb', align="center"))
-        tableHeaderRow.append(HT.TD("Progress", Class='fs14 fwb ffl b1 cw cbrb', align="center"))
-        tableHeaderRow.append(HT.TD("Operation", Class='fs14 fwb ffl b1 cw cbrb', align="center"))
-        datasetTable.append(tableHeaderRow)
-
-        for one_row in result:
-            Accession_Id, InfoPage_title, Progress = one_row
-            datasetRow = HT.TR()
-            datasetRow.append(HT.TD(HT.Href(text="GN%s" % Accession_Id, url="/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=%s" % Accession_Id, Class='fs12 fwn'), Class="fs12 fwn b1 c222"))
-            datasetRow.append(HT.TD(HT.Href(text="%s" % InfoPage_title, url="/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=%s" % Accession_Id, Class='fs12 fwn'), Class="fs12 fwn b1 c222"))
-            datasetRow.append(HT.TD("%s" % Progress, Class='fs12 fwn ffl b1 c222'))
-            operation_edit = HT.Href(text="Edit", url="/webqtl/main.py?FormID=sharinginfoedit&GN_AccessionId=%s" % Accession_Id)
-            operation_delete = HT.Href(text="Delete", onClick="deleteRecord(%s); return false;" % Accession_Id)
-            operation = HT.TD(Class="fs12 fwn b1 c222", align="center")
-            operation.append(operation_edit)
-            operation.append("&nbsp;&nbsp;&nbsp;&nbsp;")
-            operation.append(operation_delete)
-            datasetRow.append(operation)
-            datasetTable.append(datasetRow)
-
-        TD_LR.append(heading, HT.P(), newrecord, HT.P(), info, HT.P(), datasetTable)
-
-        js1 = """       <script language="javascript" type="text/javascript">
-                                        function deleteRecord(id){
-                                                question = confirm("Are you sure you want to delete the dataset info record (Accession Id="+id+")?")
-                                                if (question != "0"){
-                                                        window.open("/webqtl/main.py?FormID=sharinginfodelete&GN_AccessionId="+id, "_self");
-                                                }
-                                        }
-                                        </script>"""
-        self.dict['js1'] = js1
-        self.dict['body'] =  str(TD_LR)
diff --git a/wqflask/wqflask/dataSharing/SharingPage.py b/wqflask/wqflask/dataSharing/SharingPage.py
deleted file mode 100755
index b244a6bd..00000000
--- a/wqflask/wqflask/dataSharing/SharingPage.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-from base.templatePage import templatePage
-import SharingBody
-
-#########################################
-#      SharingPage
-#########################################
-
-class SharingPage(templatePage):
-
-    def __init__(self, fd):
-        templatePage.__init__(self, fd)
-        self.dict['title'] = 'GeneNetwork Data Sharing Zone'
-        self.dict['body'] = SharingBody.sharing_body_string
-        self.dict['js2'] = 'onload="javascript:initialDatasetSelection();"'
diff --git a/wqflask/wqflask/dataSharing/__init__.py b/wqflask/wqflask/dataSharing/__init__.py
deleted file mode 100755
index e69de29b..00000000
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
index ba017bfb..5e0a92e3 100755
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -211,7 +211,6 @@
                         <li><a href="http://gn.genetics.ucla.edu/" target="_blank" style="font-size:12px;font-family:verdana;color:blue">California at UCLA</a></li>
                         <li><a href="http://genenetwork.helmholtz-hzi.de/" target="_blank" style="font-size:12px;font-family:verdana;color:blue">Germany at the HZI</a></li>
                         <li><a href="http://genenetwork.memphis.edu/" target="_blank" style="font-size:12px;font-family:verdana;color:blue">Memphis at the U of M</a></li>
-                        <li><a href="http://webqtl.bic.nus.edu.sg/" target="_blank" style="font-size:12px;font-family:verdana;color:blue">Singapore at the NUS</a></li>
                         <li><a href="http://genenetwork.epfl.ch/" target="_blank" style="font-size:12px;font-family:verdana;color:blue">Switzerland at the EPFL</a></li>
                     </ul>
                 </section>
-- 
cgit 1.4.1


From 03d43722b043deff164420e07d6c7d5e8c128cd6 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 6 Apr 2015 15:57:11 +0000
Subject: Removed the old news code before pulling over Lei's changes

Removed the SharingPage import from views since it doesn't exist anymore
---
 wqflask/wqflask/views.py                 | 41 +++++++++++---------------------
 wqflask/wqflask/yaml_data/whats_new.yaml | 26 --------------------
 2 files changed, 14 insertions(+), 53 deletions(-)
 delete mode 100755 wqflask/wqflask/yaml_data/whats_new.yaml

diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 7fec0456..b7864041 100755
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -45,8 +45,6 @@ from wqflask.correlation_matrix import show_corr_matrix
 from wqflask.correlation import corr_scatter_plot
 from utility import temp_data
 
-from wqflask.dataSharing import SharingInfo, SharingInfoPage
-
 from base import webqtlFormData
 from utility.benchmark import Bench
 
@@ -100,20 +98,20 @@ def tmp_page(img_path):
                             img_base64 = bytesarray )
 
 
-@app.route("/data_sharing")
-def data_sharing_page():
-    print("In data_sharing")
-    fd = webqtlFormData.webqtlFormData(request.args)
-    print("1Have fd")
-    sharingInfoObject = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None)
-    info, htmlfilelist = sharingInfoObject.getBody(infoupdate="")
-    print("type(htmlfilelist):", type(htmlfilelist))
-    htmlfilelist = htmlfilelist.encode("utf-8")
-    #template_vars = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None)
-    print("1 Made it to rendering")
-    return render_template("data_sharing.html",
-                            info=info,
-                            htmlfilelist=htmlfilelist)
+#@app.route("/data_sharing")
+#def data_sharing_page():
+#    print("In data_sharing")
+#    fd = webqtlFormData.webqtlFormData(request.args)
+#    print("1Have fd")
+#    sharingInfoObject = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None)
+#    info, htmlfilelist = sharingInfoObject.getBody(infoupdate="")
+#    print("type(htmlfilelist):", type(htmlfilelist))
+#    htmlfilelist = htmlfilelist.encode("utf-8")
+#    #template_vars = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None)
+#    print("1 Made it to rendering")
+#    return render_template("data_sharing.html",
+#                            info=info,
+#                            htmlfilelist=htmlfilelist)
 
 
 @app.route("/search", methods=('GET',))
@@ -161,17 +159,6 @@ def help():
     doc = docs.Docs("help")
     return render_template("docs.html", **doc.__dict__)
 
-@app.route("/news")
-def news():
-    #variables = whats_new.whats_new()
-    with open("/home/sam/gene/wqflask/wqflask/yaml_data/whats_new.yaml") as fh:
-        contents = fh.read()
-        yamilized = yaml.safe_load(contents)
-        news_items = yamilized['news']
-    for news_item in news_items:
-        print("\nnews_item is: %s\n" % (news_item))
-    return render_template("whats_new.html", news_items=news_items)
-
 @app.route("/references")
 def references():
     doc = docs.Docs("references")
diff --git a/wqflask/wqflask/yaml_data/whats_new.yaml b/wqflask/wqflask/yaml_data/whats_new.yaml
deleted file mode 100755
index 8f41a8f2..00000000
--- a/wqflask/wqflask/yaml_data/whats_new.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-news:
--
-    date: 2012-1-20
-    title: Mouse SNPs from dbSNP have been added to GeneNetwork
-    details: 
-        10 million mouse SNPs from dbSNP (build 128) have been added to Variant Browser.
-        They could be searched by name (e.g. rs31192936) (Implemented by Xiaodong Zhou and Ning Liu).
-
--
-    date: 2012-1-20
-    title: Literature correlation has been update to 2011 version
-    details: 
-        Dr. Ramin Homayouni and Dr. Lijing Xu kindly provide the 2011 version of mouse gene-gene
-        literature correlation matrix to GeneNetwork. (Implemented by Xiaodong Zhou).
-
--
-    date: 2012-1-16
-    title: Expression data set for EPFL/LISP BXD Muscle Affy Mouse Gene 1.0 ST (Dec11) RMA ** has been entered in GeneNetwork
-    details: 
-        Laboratory of Integrative and Systems Physiology
-        (<a href="http://auwerx-lab.epfl.ch/">LISP</a>).
-        This data set is not yet freely available for global analysis.
-        This data set has not yet been used or described in any publication.
-        Please contact Johan Auwerx or Evan Williams at evan.williams@epfl.ch
-        regarding use of these data. (Implemented by J Auwerx, E Williams, LA Rose,
-        RW Williams and A Centeno).
\ No newline at end of file
-- 
cgit 1.4.1


From 4ad55a0ebc1fc77da8e9aa6ec565b2b10c373e9b Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 6 Apr 2015 21:30:13 +0000
Subject: Just committing a couple minor bug fixes to get code working after
 pulling Pjotr's latest changes

---
 wqflask/wqflask/marker_regression/marker_regression.py | 2 +-
 wqflask/wqflask/my_pylmm/pyLMM/lmm.py                  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 3fb9915b..7708356b 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -701,7 +701,7 @@ class MarkerRegression(object):
             no_val_samples = self.identify_empty_samples()
             trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
             
-            genotype_matrix = np.array(trimmed_genotype_data).T
+            genotype_matrix = np.array(genotype_data).T
 
             #print("pheno_vector: ", pf(pheno_vector))
             #print("genotype_matrix: ", pf(genotype_matrix))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 58ff809b..53689071 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -43,13 +43,14 @@ Redis = Redis()
 
 import sys
 sys.path.append("/home/zas1024/gene/wqflask/")
-sys.path.append("/home/danny/GeneNetwork/wqflask/wqflask/my_pylmm/pyLMM/")
-print("sys.path2:", sys.path)
 
 has_gn2=True
 
 from utility.benchmark import Bench
 from utility import temp_data
+
+sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/")
+
 from kinship import kinship, kinship_full, kvakve
 import genotype
 import phenotype
-- 
cgit 1.4.1