From 5ec961a719246d5082814c8eb49ee3f3d7cea11e Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 18 Dec 2014 16:16:46 +0000 Subject: Committer: Lei Yan On branch master --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9a509baa..b2c841ad 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *.bak *~ web/new_genotypes/HSNIH.json +wqflask/secure_server.py -- cgit v1.2.3 From 4ea8b8a5efc848c99767894cbdbef2f135e3b83c Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 18 Dec 2014 16:51:47 +0000 Subject: Committer: Lei Yan On branch master --- web/images/edit.png | Bin 0 -> 4322 bytes wqflask/wqflask/docs.py | 1 + wqflask/wqflask/templates/docedit.html | 20 ++++++++++++++++++++ wqflask/wqflask/templates/docs.html | 5 +++++ wqflask/wqflask/templates/testhtmleditor.html | 20 -------------------- wqflask/wqflask/views.py | 7 ++++--- 6 files changed, 30 insertions(+), 23 deletions(-) create mode 100644 web/images/edit.png create mode 100755 wqflask/wqflask/templates/docedit.html delete mode 100755 wqflask/wqflask/templates/testhtmleditor.html diff --git a/web/images/edit.png b/web/images/edit.png new file mode 100644 index 00000000..dda50521 Binary files /dev/null and b/web/images/edit.png differ diff --git a/wqflask/wqflask/docs.py b/wqflask/wqflask/docs.py index 65255987..07b0b81a 100755 --- a/wqflask/wqflask/docs.py +++ b/wqflask/wqflask/docs.py @@ -11,5 +11,6 @@ class Docs(object): WHERE Docs.entry LIKE '%s' """ result = g.db.execute(sql % (entry)).fetchone() + self.entry = entry self.title = result[0] self.content = result[1] diff --git a/wqflask/wqflask/templates/docedit.html b/wqflask/wqflask/templates/docedit.html new file mode 100755 index 00000000..1a9e8ca8 --- /dev/null +++ b/wqflask/wqflask/templates/docedit.html @@ -0,0 +1,20 @@ +{% extends "base.html" %} + +{% block title %}Edit: {{title}}{% endblock %} + +{% block content %} +
+

Edit: {{title}}

+
+ + + +
+
+{% endblock %} diff --git a/wqflask/wqflask/templates/docs.html b/wqflask/wqflask/templates/docs.html index cbaf1e70..08f95721 100755 --- a/wqflask/wqflask/templates/docs.html +++ b/wqflask/wqflask/templates/docs.html @@ -5,6 +5,11 @@ {% block content %}

{{title}}

+
+ + + +
{{content|safe}}
{% endblock %} diff --git a/wqflask/wqflask/templates/testhtmleditor.html b/wqflask/wqflask/templates/testhtmleditor.html deleted file mode 100755 index 1d258d0e..00000000 --- a/wqflask/wqflask/templates/testhtmleditor.html +++ /dev/null @@ -1,20 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Test html editor{% endblock %} - -{% block content %} -
-

Test HTML Editor

-
- - - -
-
-{% endblock %} diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index cdf93147..deb566ba 100755 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -134,9 +134,10 @@ def search_page(): else: return render_template("search_result_page.html", **result) -@app.route("/testhtmleditor") -def testhtmleditor_page(): - return render_template("testhtmleditor.html") +@app.route("/docedit") +def docedit(): + doc = docs.Docs(request.args['entry']) + return render_template("docedit.html", **doc.__dict__) @app.route("/help") def help(): -- cgit v1.2.3 From 723fcb3723d16e4ef476b0fc7471f7a770ed01be Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Fri, 13 Feb 2015 18:18:49 +0000 Subject: Committer: Lei Yan On branch master --- wqflask/wqflask/news.py | 16 ++++++++++++++++ wqflask/wqflask/templates/news.html | 21 +++++++++++++++++++++ wqflask/wqflask/views.py | 13 ++++--------- 3 files changed, 41 insertions(+), 9 deletions(-) create mode 100755 wqflask/wqflask/news.py create mode 100755 wqflask/wqflask/templates/news.html diff --git a/wqflask/wqflask/news.py b/wqflask/wqflask/news.py new file mode 100755 index 00000000..62dc1bbb --- /dev/null +++ b/wqflask/wqflask/news.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import, print_function, division +import sys +reload(sys) +sys.setdefaultencoding('utf8') +from flask import g + +class News(object): + + def __init__(self): + sql = """ + SELECT News.id, News.date, News.details + FROM News + order by News.date desc + """ + self.title = "GeneNetwork News" + self.newslist = g.db.execute(sql).fetchall() diff --git a/wqflask/wqflask/templates/news.html b/wqflask/wqflask/templates/news.html new file mode 100755 index 00000000..0a19dcee --- /dev/null +++ b/wqflask/wqflask/templates/news.html @@ -0,0 +1,21 @@ +{% extends "base.html" %} + +{% block title %}{{title}}{% endblock %} + +{% block content %} +
+

{{title}}

+ + + {% for newsitem in newslist %} + + + + + {% endfor %} + +
+ {{newsitem.date}} + {{newsitem.details|safe}}
+
+{% endblock %} diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index deb566ba..7c0f4e14 100755 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -30,6 +30,7 @@ from flask import (render_template, request, make_response, Response, from wqflask import search_results from wqflask import docs +from wqflask import news from base.data_set import DataSet # Used by YAML in marker_regression from base.data_set import create_datasets_list from wqflask.show_trait import show_trait @@ -145,15 +146,9 @@ def help(): return render_template("docs.html", **doc.__dict__) @app.route("/news") -def news(): - #variables = whats_new.whats_new() - with open("/home/sam/gene/wqflask/wqflask/yaml_data/whats_new.yaml") as fh: - contents = fh.read() - yamilized = yaml.safe_load(contents) - news_items = yamilized['news'] - for news_item in news_items: - print("\nnews_item is: %s\n" % (news_item)) - return render_template("whats_new.html", news_items=news_items) +def news_route(): + newsobject = news.News() + return render_template("news.html", **newsobject.__dict__) @app.route("/references") def references(): -- cgit v1.2.3 From 0e21a58cfe7ca1ec6c1d84c43e1d2fd2d0b9104c Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Fri, 13 Feb 2015 19:54:00 +0000 Subject: Committer: Lei Yan On branch master --- wqflask/wqflask/templates/news.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/templates/news.html b/wqflask/wqflask/templates/news.html index 0a19dcee..4f0032b8 100755 --- a/wqflask/wqflask/templates/news.html +++ b/wqflask/wqflask/templates/news.html @@ -5,11 +5,11 @@ {% block content %}

{{title}}

- +
{% for newsitem in newslist %} - -- cgit v1.2.3 From dbc048f06a50837f59ce4d2ee7cfe84ab8e6062f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 15 Mar 2015 14:41:14 +0300 Subject: Comment --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5aa27106..1fa7a895 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -334,7 +334,7 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): inds (columns) by snps (rows). """ G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - return kinship(G.T),G + return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): """ -- cgit v1.2.3 From 8463812f328973190b5a5160bdcff47a7f1cd12d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Mar 2015 12:41:43 +0300 Subject: Refactoring eigenvalue KvaKve --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 20 ++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 13 +++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 28f2042d..bb4f5ace 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -21,10 +21,12 @@ import sys import os import numpy as np +from scipy import linalg import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue import time + from optmatrix import matrix_initialize, matrixMultT def kinship_full(G): @@ -153,5 +155,23 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): # np.savetxt(outFile+".kve",Kve) return K +def kvakve(K, verbose=True): + """ + Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned + of small values < 1e-6 (notably smaller than zero) + """ + if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + + Kva,Kve = linalg.eigh(K) + if verbose: + print("self.Kva is: ", Kva.shape, Kva) + print("self.Kve is: ", Kve.shape, Kve) + + if sum(Kva < 1e-6): + if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6))) + Kva[Kva < 1e-6] = 1e-6 + return Kva,Kve + + diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 1fa7a895..17fe952a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -49,7 +49,7 @@ has_gn2=True from utility.benchmark import Bench from utility import temp_data -from kinship import kinship, kinship_full +from kinship import kinship, kinship_full, kvakve import genotype try: @@ -532,11 +532,12 @@ class LMM: print("this K is:", K.shape, pf(K)) if len(Kva) == 0 or len(Kve) == 0: - if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - Kva,Kve = linalg.eigh(K) - end = time.time() - if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + # begin = time.time() + # Kva,Kve = linalg.eigh(K) + # end = time.time() + # if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + Kva,Kve = kvakve(K) self.K = K self.Kva = Kva -- cgit v1.2.3 From 3cff3091379a2e8835027c272ada0d8d4314624e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Mar 2015 13:15:21 +0300 Subject: Output added --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 16 +++++++++------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 1 + 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index bb4f5ace..cd2445f1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -164,8 +164,8 @@ def kvakve(K, verbose=True): Kva,Kve = linalg.eigh(K) if verbose: - print("self.Kva is: ", Kva.shape, Kva) - print("self.Kve is: ", Kve.shape, Kve) + print("Kva is: ", Kva.shape, Kva) + print("Kve is: ", Kve.shape, Kve) if sum(Kva < 1e-6): if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6))) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 17fe952a..9fd05b42 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -533,11 +533,12 @@ class LMM: if len(Kva) == 0 or len(Kve) == 0: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - # begin = time.time() + begin = time.time() # Kva,Kve = linalg.eigh(K) - # end = time.time() - # if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) Kva,Kve = kvakve(K) + end = time.time() + if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) self.K = K self.Kva = Kva @@ -547,10 +548,11 @@ class LMM: self.Y = Y self.X0 = X0 self.N = self.K.shape[0] - - if sum(self.Kva < 1e-6): - if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) - self.Kva[self.Kva < 1e-6] = 1e-6 + + # ----> Below moved to kinship.kvakve(K) + # if sum(self.Kva < 1e-6): + # if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) + # self.Kva[self.Kva < 1e-6] = 1e-6 self.transform() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 6bb79856..cef0cdc4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -120,6 +120,7 @@ if cmd == 'redis': G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt) print np.array(ps) + print sum(ps) # Test results p1 = round(ps[0],4) p2 = round(ps[-1],4) -- cgit v1.2.3 From 62f6db77b22e1fb28b3355d75a30f9ecf6c94d95 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Mar 2015 15:34:38 +0300 Subject: Adding multi-core GWAS --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 213 ++++++++++++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 4 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 62 +++++++-- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 8 +- 4 files changed, 271 insertions(+), 16 deletions(-) create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gwas.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py new file mode 100644 index 00000000..52455014 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -0,0 +1,213 @@ +# pylmm-based GWAS calculation +# +# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +#!/usr/bin/python + +import pdb +import time +import sys +# from utility import temp_data +import lmm + + +import os +import numpy as np +import input +from optmatrix import matrix_initialize +# from lmm import LMM + +import multiprocessing as mp # Multiprocessing is part of the Python stdlib +import Queue + +def compute_snp(j,snp_ids,q = None): + # print(j,len(snp_ids),"\n") + result = [] + for snp_id in snp_ids: + # j,snp_id = collect + snp,id = snp_id + # id = collect[1] + # result = [] + # Check SNPs for missing values + x = snp[keep].reshape((n,1)) # all the SNPs + v = np.isnan(x).reshape((-1,)) + if v.sum(): + # NOTE: this code appears to be unreachable! + if verbose: + sys.stderr.write("Found missing values in "+str(x)) + keeps = True - v + xs = x[keeps,:] + if keeps.sum() <= 1 or xs.var() <= 1e-6: + # PS.append(np.nan) + # TS.append(np.nan) + # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) + # continue + result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) + continue + + # Its ok to center the genotype - I used normalizeGenotype to + # force the removal of missing genotypes as opposed to replacing them with MAF. + if not normalizeGenotype: + xs = (xs - xs.mean()) / np.sqrt(xs.var()) + Ys = Y[keeps] + X0s = X0[keeps,:] + Ks = K[keeps,:][:,keeps] + if kfile2: + K2s = K2[keeps,:][:,keeps] + Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=verbose,K2=K2s) + else: + Ls = LMM(Ys,Ks,X0=X0s,verbose=verbose) + if refit: + Ls.fit(X=xs,REML=REML) + else: + #try: + Ls.fit(REML=REML) + #except: pdb.set_trace() + ts,ps,beta,betaVar = Ls.association(xs,REML=REML,returnBeta=True) + else: + if x.var() == 0: + # Note: this code appears to be unreachable! + + # PS.append(np.nan) + # TS.append(np.nan) + # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values + result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) + continue + + if refit: + L.fit(X=x,REML=REML) + # This is where it happens + ts,ps,beta,betaVar = L.association(x,REML=REML,returnBeta=True) + result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) + # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)]) + # print [j,result[0]]," in result queue\n" + if not q: + q = compute_snp.q + q.put([j,result]) + return j + # PS.append(ps) + # TS.append(ts) + # return len(result) + # compute.q.put(result) + # return None + +def f_init(q): + compute_snp.q = q + +def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): + """ + Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows) + """ + matrix_initialize() + cpu_num = mp.cpu_count() + numThreads = 1 + kfile2 = False + + sys.stderr.write(str(G.shape)+"\n") + n = G.shape[1] # inds + inds = n + m = G.shape[0] # snps + snps = m + sys.stderr.write(str(m)+" SNPs\n") + # print "***** GWAS: G",G.shape,G + assert m>n, "n should be larger than m (snps>inds)" + + # CREATE LMM object for association + # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) + # else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2) + + runlmm = lmm.LMM(Y,K) # ,Kva,Kve,X0,verbose=verbose) + if not refit: + if verbose: sys.stderr.write("Computing fit for null model\n") + runlmm.fit() # follow GN model in run_other + if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (runlmm.optH,runlmm.optSigma)) + + # outFile = "test.out" + # out = open(outFile,'w') + out = sys.stderr + + def outputResult(id,beta,betaSD,ts,ps): + out.write(formatResult(id,beta,betaSD,ts,ps)) + def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") + def formatResult(id,beta,betaSD,ts,ps): + return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" + + printOutHead() + + # Set up the pool + # mp.set_start_method('spawn') + q = mp.Queue() + p = mp.Pool(numThreads, f_init, [q]) + collect = [] + + # Buffers for pvalues and t-stats + # PS = [] + # TS = [] + count = 0 + + completed = 0 + last_j = 0 + # for snp_id in G: + for snp in G: + print snp.shape,snp + snp_id = ('SNPID',snp) + count += 1 + if count % 1000 == 0: + job = count/1000 + if verbose: + sys.stderr.write("Job %d At SNP %d\n" % (job,count)) + if numThreads == 1: + compute_snp(job,collect,q) + collect = [] + j,lines = q.get() + if verbose: + sys.stderr.write("Job "+str(j)+" finished\n") + for line in lines: + out.write(line) + else: + p.apply_async(compute_snp,(job,collect)) + collect = [] + while job > completed: + try: + j,lines = q.get_nowait() + if verbose: + sys.stderr.write("Job "+str(j)+" finished\n") + for line in lines: + out.write(line) + completed += 1 + except Queue.Empty: + pass + if job > completed + cpu_num + 5: + time.sleep(1) + else: + if job >= completed: + break + + collect.append(snp_id) + + if not numThreads or numThreads > 1: + for job in range(int(count/1000)-completed): + j,lines = q.get(True,15) # time out + if verbose: + sys.stderr.write("Job "+str(j)+" finished\n") + for line in lines: + out.write(line) + + # print collect + ps = [item[1][3] for item in collect] + ts = [item[1][2] for item in collect] + print ps + return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index cd2445f1..00f48939 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -26,7 +26,6 @@ import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue import time - from optmatrix import matrix_initialize, matrixMultT def kinship_full(G): @@ -87,12 +86,13 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): m = G.shape[0] # snps snps = m sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" + assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() print "CPU cores:",cpu_num + print snps,computeSize iterations = snps/computeSize+1 # if testing: # iterations = 8 diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 9fd05b42..8c6d3c3c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -51,6 +51,7 @@ from utility.benchmark import Bench from utility import temp_data from kinship import kinship, kinship_full, kvakve import genotype +import gwas try: from wqflask.my_pylmm.pyLMM import chunks @@ -253,7 +254,7 @@ def human_association(snp, # refit=False, # temp_data=None): -def run_other(pheno_vector, +def run_other_old(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False, @@ -269,7 +270,7 @@ def run_other(pheno_vector, """ - print("In run_other") + print("In run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) @@ -277,9 +278,51 @@ def run_other(pheno_vector, print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) + # with Bench("Create LMM object"): + # lmm_ob = LMM(pheno_vector, kinship_matrix) + + # with Bench("LMM_ob fitting"): + # lmm_ob.fit() + + print("genotype_matrix: ", genotype_matrix.shape) + print(genotype_matrix) + + with Bench("Doing GWAS"): + t_stats, p_values = GWAS(pheno_vector, + genotype_matrix, + kinship_matrix, + restricted_max_likelihood=True, + refit=False, + temp_data=tempdata) + Bench().report() + return p_values, t_stats + +def run_other_new(pheno_vector, + genotype_matrix, + restricted_max_likelihood=True, + refit=False, + tempdata=None # <---- can not be None + ): + + """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics + + restricted_max_likelihood -- whether to use restricted max likelihood; True or False + refit -- whether to refit the variance component for each marker + temp_data -- TempData object that stores the progress for each major step of the + calculations ("calculate_kinship" and "GWAS" take the majority of time) + + """ + + print("In run_other (new)") + print("REML=",restricted_max_likelihood," REFIT=",refit) + with Bench("Calculate Kinship"): + kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + + print("kinship_matrix: ", pf(kinship_matrix)) + print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) + with Bench("Create LMM object"): lmm_ob = LMM(pheno_vector, kinship_matrix) - with Bench("LMM_ob fitting"): lmm_ob.fit() @@ -287,15 +330,15 @@ def run_other(pheno_vector, print(genotype_matrix) with Bench("Doing GWAS"): - t_stats, p_values = GWAS(pheno_vector, - genotype_matrix, - kinship_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=tempdata) + t_stats, p_values = gwas.gwas(pheno_vector, + genotype_matrix.T, + kinship_matrix, + restricted_max_likelihood=True, + refit=False,verbose=True) Bench().report() return p_values, t_stats +run_other = run_other_old def matrixMult(A,B): @@ -821,4 +864,3 @@ if __name__ == '__main__': print("Run from runlmm.py instead") - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index cef0cdc4..f17f1bd1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship +from lmm import gn2_load_redis, calculate_kinship_old from kinship import kinship, kinship_full import genotype import phenotype @@ -151,17 +151,17 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(G) + K = kinship_full(np.copy(G)) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2 = calculate_kinship(np.copy(G.T),temp_data=None) + K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(np.copy(G),options) + K3 = kinship(G.T) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) -- cgit v1.2.3 From cc419336f559a51ed03a669d19042e6fd21355ed Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Mar 2015 17:34:40 +0300 Subject: Adding GWAS code --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 99 ++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 44 ++- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 405 ++++++++++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 26 +- 5 files changed, 490 insertions(+), 86 deletions(-) create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index 52455014..b9d5db61 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -21,79 +21,30 @@ import pdb import time import sys # from utility import temp_data -import lmm - +import lmm2 import os import numpy as np import input from optmatrix import matrix_initialize -# from lmm import LMM +from lmm2 import LMM2 import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue -def compute_snp(j,snp_ids,q = None): - # print(j,len(snp_ids),"\n") +def formatResult(id,beta,betaSD,ts,ps): + return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" + +def compute_snp(j,n,snp_ids,lmm2,REML,q = None): + # print(j,snp_ids,"\n") result = [] for snp_id in snp_ids: - # j,snp_id = collect snp,id = snp_id - # id = collect[1] - # result = [] - # Check SNPs for missing values - x = snp[keep].reshape((n,1)) # all the SNPs - v = np.isnan(x).reshape((-1,)) - if v.sum(): - # NOTE: this code appears to be unreachable! - if verbose: - sys.stderr.write("Found missing values in "+str(x)) - keeps = True - v - xs = x[keeps,:] - if keeps.sum() <= 1 or xs.var() <= 1e-6: - # PS.append(np.nan) - # TS.append(np.nan) - # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) - # continue - result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) - continue - - # Its ok to center the genotype - I used normalizeGenotype to - # force the removal of missing genotypes as opposed to replacing them with MAF. - if not normalizeGenotype: - xs = (xs - xs.mean()) / np.sqrt(xs.var()) - Ys = Y[keeps] - X0s = X0[keeps,:] - Ks = K[keeps,:][:,keeps] - if kfile2: - K2s = K2[keeps,:][:,keeps] - Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=verbose,K2=K2s) - else: - Ls = LMM(Ys,Ks,X0=X0s,verbose=verbose) - if refit: - Ls.fit(X=xs,REML=REML) - else: - #try: - Ls.fit(REML=REML) - #except: pdb.set_trace() - ts,ps,beta,betaVar = Ls.association(xs,REML=REML,returnBeta=True) - else: - if x.var() == 0: - # Note: this code appears to be unreachable! - - # PS.append(np.nan) - # TS.append(np.nan) - # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values - result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) - continue - - if refit: - L.fit(X=x,REML=REML) - # This is where it happens - ts,ps,beta,betaVar = L.association(x,REML=REML,returnBeta=True) + x = snp.reshape((n,1)) # all the SNPs + # if refit: + # L.fit(X=snp,REML=REML) + ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) - # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)]) - # print [j,result[0]]," in result queue\n" if not q: q = compute_snp.q q.put([j,result]) @@ -113,8 +64,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): """ matrix_initialize() cpu_num = mp.cpu_count() - numThreads = 1 + numThreads = None kfile2 = False + reml = restricted_max_likelihood sys.stderr.write(str(G.shape)+"\n") n = G.shape[1] # inds @@ -123,17 +75,17 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): snps = m sys.stderr.write(str(m)+" SNPs\n") # print "***** GWAS: G",G.shape,G - assert m>n, "n should be larger than m (snps>inds)" + assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) # else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2) - runlmm = lmm.LMM(Y,K) # ,Kva,Kve,X0,verbose=verbose) + lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) if not refit: if verbose: sys.stderr.write("Computing fit for null model\n") - runlmm.fit() # follow GN model in run_other - if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (runlmm.optH,runlmm.optSigma)) + lmm2.fit() # follow GN model in run_other + if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma)) # outFile = "test.out" # out = open(outFile,'w') @@ -142,8 +94,6 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): def outputResult(id,beta,betaSD,ts,ps): out.write(formatResult(id,beta,betaSD,ts,ps)) def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - def formatResult(id,beta,betaSD,ts,ps): - return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" printOutHead() @@ -162,15 +112,15 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): last_j = 0 # for snp_id in G: for snp in G: - print snp.shape,snp - snp_id = ('SNPID',snp) + snp_id = (snp,'SNPID') count += 1 if count % 1000 == 0: job = count/1000 if verbose: sys.stderr.write("Job %d At SNP %d\n" % (job,count)) if numThreads == 1: - compute_snp(job,collect,q) + print "Running on 1 THREAD" + compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lines = q.get() if verbose: @@ -178,7 +128,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): for line in lines: out.write(line) else: - p.apply_async(compute_snp,(job,collect)) + p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) collect = [] while job > completed: try: @@ -205,6 +155,13 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): sys.stderr.write("Job "+str(j)+" finished\n") for line in lines: out.write(line) + else: + print "Running on 1 THREAD" + # print collect + compute_snp(count/1000,n,collect,lmm2,reml,q) + j,lines = q.get() + for line in lines: + out.write(line) # print collect ps = [item[1][3] for item in collect] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8c6d3c3c..c42f9fb7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -51,6 +51,7 @@ from utility.benchmark import Bench from utility import temp_data from kinship import kinship, kinship_full, kvakve import genotype +import phenotype import gwas try: @@ -315,32 +316,45 @@ def run_other_new(pheno_vector, print("In run_other (new)") print("REML=",restricted_max_likelihood," REFIT=",refit) + + # Adjust phenotypes + Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True) + print("Removed missing phenotypes",Y.shape) + # if options.maf_normalization: + # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) + # print "MAF replacements: \n",G + # if not options.skip_genotype_normalization: + # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + K,G = calculate_kinship(G, tempdata) - print("kinship_matrix: ", pf(kinship_matrix)) - print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) + print("kinship_matrix: ", pf(K)) + print("kinship_matrix.shape: ", pf(K.shape)) - with Bench("Create LMM object"): - lmm_ob = LMM(pheno_vector, kinship_matrix) - with Bench("LMM_ob fitting"): - lmm_ob.fit() + # with Bench("Create LMM object"): + # lmm_ob = lmm2.LMM2(Y,K) + # with Bench("LMM_ob fitting"): + # lmm_ob.fit() - print("genotype_matrix: ", genotype_matrix.shape) - print(genotype_matrix) + print("genotype_matrix: ", G.shape) + print(G) with Bench("Doing GWAS"): - t_stats, p_values = gwas.gwas(pheno_vector, - genotype_matrix.T, - kinship_matrix, + t_stats, p_values = gwas.gwas(Y, + G.T, + K, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() return p_values, t_stats -run_other = run_other_old +run_other = run_other_new def matrixMult(A,B): + return np.dot(A,B) + +def matrixMult_old(A,B): # If there is no fblas then we will revert to np.dot() @@ -674,11 +688,15 @@ class LMM: optimum. """ + print("H=",H) + print("X=",X) + print("REML=",REML) n = len(self.LLs) HOpt = [] for i in range(1,n-2): if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + print("HOpt=",HOpt) if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py new file mode 100644 index 00000000..cba47a9b --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -0,0 +1,405 @@ +# pylmm is a python-based linear mixed-model solver with applications to GWAS + +# Copyright (C) 2013,2014 Nicholas A. Furlotte (nick.furlotte@gmail.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys +import time +import numpy as np +from scipy.linalg import eigh, inv, det +import scipy.stats as stats # t-tests +from scipy import optimize +from optmatrix import matrixMult + +def calculateKinship(W,center=False): + """ + W is an n x m matrix encoding SNP minor alleles. + + This function takes a matrix oF SNPs, imputes missing values with the maf, + normalizes the resulting vectors and returns the RRM matrix. + """ + n = W.shape[0] + m = W.shape[1] + keep = [] + for i in range(m): + mn = W[True - np.isnan(W[:,i]),i].mean() + W[np.isnan(W[:,i]),i] = mn + vr = W[:,i].var() + if vr == 0: continue + + keep.append(i) + W[:,i] = (W[:,i] - mn) / np.sqrt(vr) + + W = W[:,keep] + K = matrixMult(W,W.T) * 1.0/float(m) + if center: + P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n)) + S = np.trace(matrixMult(matrixMult(P,K),P)) + K_n = (n - 1)*K / S + return K_n + return K + +def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): + """ + + Performs a basic GWAS scan using the LMM. This function + uses the LMM module to assess association at each SNP and + does some simple cleanup, such as removing missing individuals + per SNP and re-computing the eigen-decomp + + Y - n x 1 phenotype vector + X - n x m SNP matrix (genotype matrix) + K - n x n kinship matrix + Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K + X0 - n x q covariate matrix + REML - use restricted maximum likelihood + refit - refit the variance component for each SNP + + """ + n = X.shape[0] + m = X.shape[1] + prins("Initialize GWAS") + print("genotype matrix n is:", n) + print("genotype matrix m is:", m) + + if X0 == None: + X0 = np.ones((n,1)) + + # Remove missing values in Y and adjust associated parameters + v = np.isnan(Y) + if v.sum(): + keep = True - v + keep = keep.reshape((-1,)) + Y = Y[keep] + X = X[keep,:] + X0 = X0[keep,:] + K = K[keep,:][:,keep] + Kva = [] + Kve = [] + + if len(Y) == 0: + return np.ones(m)*np.nan,np.ones(m)*np.nan + + L = LMM(Y,K,Kva,Kve,X0) + if not refit: L.fit() + + PS = [] + TS = [] + + n = X.shape[0] + m = X.shape[1] + + for i in range(m): + x = X[:,i].reshape((n,1)) + v = np.isnan(x).reshape((-1,)) + if v.sum(): + keep = True - v + xs = x[keep,:] + if xs.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue + + Ys = Y[keep] + X0s = X0[keep,:] + Ks = K[keep,:][:,keep] + Ls = LMM(Ys,Ks,X0=X0s) + if refit: + Ls.fit(X=xs) + else: + Ls.fit() + ts,ps = Ls.association(xs,REML=REML) + else: + if x.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue + + if refit: + L.fit(X=x) + ts,ps = L.association(x,REML=REML) + + PS.append(ps) + TS.append(ts) + + return TS,PS + +class LMM2: + + """ + This is a simple version of EMMA/fastLMM. + The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) + and to optimize this model by finding the maximum-likelihood estimates for the model parameters. + There are three model parameters: heritability (h), covariate coefficients (beta) and the total + phenotypic variance (sigma). + Heritability as defined here is the proportion of the total variance (sigma) that is attributed to + the kinship matrix. + + For simplicity, we assume that everything being input is a numpy array. + If this is not the case, the module may throw an error as conversion from list to numpy array + is not done consistently. + + """ + def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): + + """ + The constructor takes a phenotype vector or array Y of size n. + It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. + If they are not provided, the constructor will calculate them. + X0 is an optional covariate matrix of size n x q, where there are q covariates. + When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ + + if X0 == None: + X0 = np.ones(len(Y)).reshape(len(Y),1) + self.verbose = verbose + + x = True - np.isnan(Y) + x = x.reshape(-1,) + if not x.sum() == len(Y): + if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) + Y = Y[x] + K = K[x,:][:,x] + X0 = X0[x,:] + Kva = [] + Kve = [] + self.nonmissing = x + + if len(Kva) == 0 or len(Kve) == 0: + if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + begin = time.time() + Kva,Kve = eigh(K) + end = time.time() + if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + + self.K = K + self.Kva = Kva + self.Kve = Kve + self.N = self.K.shape[0] + self.Y = Y.reshape((self.N,1)) + self.X0 = X0 + + if sum(self.Kva < 1e-6): + if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) + self.Kva[self.Kva < 1e-6] = 1e-6 + + self.transform() + + def transform(self): + + """ + Computes a transformation on the phenotype vector and the covariate matrix. + The transformation is obtained by left multiplying each parameter by the transpose of the + eigenvector matrix of K (the kinship). + """ + + self.Yt = matrixMult(self.Kve.T, self.Y) + self.X0t = matrixMult(self.Kve.T, self.X0) + self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) + self.q = self.X0t.shape[1] + + def getMLSoln(self,h,X): + + """ + Obtains the maximum-likelihood estimates for the covariate coefficients (beta), + the total variance of the trait (sigma) and also passes intermediates that can + be utilized in other functions. The input parameter h is a value between 0 and 1 and represents + the heritability or the proportion of the total variance attributed to genetics. The X is the + covariate matrix. + """ + + S = 1.0/(h*self.Kva + (1.0 - h)) + Xt = X.T*S + XX = matrixMult(Xt,X) + XX_i = inv(XX) + beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) + Yt = self.Yt - matrixMult(X,beta) + Q = np.dot(Yt.T*S,Yt) + sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) + return beta,sigma,Q,XX_i,XX + + def LL_brent(self,h,X=None,REML=False): + #brent will not be bounded by the specified bracket. + # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. + if h < 0: return 1e6 + return -self.LL(h,X,stack=False,REML=REML)[0] + + def LL(self,h,X=None,stack=True,REML=False): + + """ + Computes the log-likelihood for a given heritability (h). If X==None, then the + default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with + the input X. If stack is false, then X is used in place of X0t in the LL calculation. + REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. + """ + + if X == None: X = self.X0t + elif stack: + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + n = float(self.N) + q = float(X.shape[1]) + beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) + LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) + LL = -0.5 * LL + + if REML: + LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX)) + LL = LL + 0.5*LL_REML_part + + + LL = LL.sum() + return LL,beta,sigma,XX_i + + def getMax(self,H, X=None,REML=False): + + """ + Helper functions for .fit(...). + This function takes a set of LLs computed over a grid and finds possible regions + containing a maximum. Within these regions, a Brent search is performed to find the + optimum. + + """ + n = len(self.LLs) + HOpt = [] + for i in range(1,n-2): + if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: + HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + if np.isnan(HOpt[-1]): HOpt[-1] = H[i-1] + #if np.isnan(HOpt[-1]): HOpt[-1] = self.LLs[i-1] + #if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] + + if len(HOpt) > 1: + if self.verbose: sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") + return HOpt[0] + elif len(HOpt) == 1: return HOpt[0] + elif self.LLs[0] > self.LLs[n-1]: return H[0] + else: return H[n-1] + + + def fit(self,X=None,ngrids=100,REML=True): + + """ + Finds the maximum-likelihood solution for the heritability (h) given the current parameters. + X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as + the covariate matrix. + + This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. + Given this optimum, the function computes the LL and associated ML solutions. + """ + + if X == None: X = self.X0t + else: + #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + H = np.array(range(ngrids)) / float(ngrids) + L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) + self.LLs = L + + hmax = self.getMax(H,X,REML) + L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) + + self.H = H + self.optH = hmax.sum() + self.optLL = L + self.optBeta = beta + self.optSigma = sigma.sum() + + return hmax,beta,sigma,L + + def association(self,X,h=None,stack=True,REML=True,returnBeta=False): + """ + Calculates association statitics for the SNPs encoded in the vector X of size n. + If h == None, the optimal h stored in optH is used. + + """ + if False: + print "X=",X + print "h=",h + print "q=",self.q + print "self.Kve=",self.Kve + print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack + + if stack: + # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + m = matrixMult(self.Kve.T,X) + # print "m=",m + m = m[:,0] + self.X0t_stack[:,(self.q)] = m + X = self.X0t_stack + + if h == None: h = self.optH + + L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) + q = len(beta) + ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) + + if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma + return ts,ps + + def tstat(self,beta,var,sigma,q,log=False): + + """ + Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. + This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. + """ + + ts = beta / np.sqrt(var * sigma) + #ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) + # sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough + if log: + ps = 2.0 + (stats.t.logsf(np.abs(ts), self.N-q)) + else: + ps = 2.0*(stats.t.sf(np.abs(ts), self.N-q)) + if not len(ts) == 1 or not len(ps) == 1: + raise Exception("Something bad happened :(") + return ts.sum(),ps.sum() + + def plotFit(self,color='b-',title=''): + + """ + Simple function to visualize the likelihood space. It takes the LLs + calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. + The resulting "probabilities" are normalized to one and plotted against heritability. + This can be seen as an approximation to the posterior distribuiton of heritability. + + For diagnostic purposes this lets you see if there is one distinct maximum or multiple + and what the variance of the parameter looks like. + """ + import matplotlib.pyplot as pl + + mx = self.LLs.max() + p = np.exp(self.LLs - mx) + p = p/p.sum() + + pl.plot(self.H,p,color) + pl.xlabel("Heritability") + pl.ylabel("Probability of data") + pl.title(title) + + def meanAndVar(self): + + mx = self.LLs.max() + p = np.exp(self.LLs - mx) + p = p/p.sum() + + mn = (self.H * p).sum() + vx = ((self.H - mn)**2 * p).sum() + + return mn,vx + diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index bb620052..682ba371 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -36,5 +36,5 @@ def remove_missing(y,g,verbose=False): sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) y1 = y[keep] g1 = g[keep,:] - return y1,g1 + return y1,g1,keep diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index f17f1bd1..4268f3be 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -99,13 +99,37 @@ if options.geno: g = tsvreader.geno(options.geno) print g.shape +if cmd == 'redis_new': + # Emulating the redis setup of GN2 + Y = y + G = g + print "Original G",G.shape, "\n", G + + gt = G.T + G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,gt) + print np.array(ps) + print sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + sys.stderr.write(options.geno+"\n") + if options.geno == 'data/small.geno': + assert p1==0.0708, "p1=%f" % p1 + assert p2==0.1417, "p2=%f" % p2 + if options.geno == 'data/small_na.geno': + assert p1==0.0958, "p1=%f" % p1 + assert p2==0.0435, "p2=%f" % p2 + if options.geno == 'data/test8000.geno': + assert p1==0.8984, "p1=%f" % p1 + assert p2==0.9623, "p2=%f" % p2 if cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G if y != None: gnt = np.array(g).T - Y,g = phenotype.remove_missing(y,g.T,options.verbose) + Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T print "Removed missing phenotypes",G.shape, "\n", G if options.maf_normalization: -- cgit v1.2.3 From 886934b9cf18cf092c5d6cd0667d860aa30e64b8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Mar 2015 18:08:00 +0300 Subject: GWAS: results get returned --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 56 +++++++++++++++++++--------------- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 10 +++--- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index b9d5db61..f8d77ab6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -36,15 +36,17 @@ def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - # print(j,snp_ids,"\n") + # print("COMPUTE SNP",j,snp_ids,"\n") result = [] for snp_id in snp_ids: snp,id = snp_id x = snp.reshape((n,1)) # all the SNPs + # print "X=",x # if refit: # L.fit(X=snp,REML=REML) ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) - result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) + # result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) + result.append( (ts,ps) ) if not q: q = compute_snp.q q.put([j,result]) @@ -95,7 +97,8 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): out.write(formatResult(id,beta,betaSD,ts,ps)) def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - printOutHead() + # printOutHead() + res = [] # Set up the pool # mp.set_start_method('spawn') @@ -114,6 +117,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): for snp in G: snp_id = (snp,'SNPID') count += 1 + # print count,snp_id if count % 1000 == 0: job = count/1000 if verbose: @@ -122,21 +126,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): print "Running on 1 THREAD" compute_snp(job,n,collect,lmm2,reml,q) collect = [] - j,lines = q.get() + j,lst = q.get() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - for line in lines: - out.write(line) + # for line in lines: + # out.write(line) + res.append(lst) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) collect = [] while job > completed: try: - j,lines = q.get_nowait() + j,lst = q.get_nowait() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - for line in lines: - out.write(line) + # for line in lines: + # out.write(line) + res.append(lst) completed += 1 except Queue.Empty: pass @@ -148,23 +154,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): collect.append(snp_id) - if not numThreads or numThreads > 1: + if numThreads==1: + print "Running on 1 THREAD" + compute_snp(count/1000,n,collect,lmm2,reml,q) + j,lst = q.get() + # for line in lines: + # out.write(line) + res.append(lst) + else: for job in range(int(count/1000)-completed): - j,lines = q.get(True,15) # time out + j,lst = q.get(True,15) # time out if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - for line in lines: - out.write(line) - else: - print "Running on 1 THREAD" - # print collect - compute_snp(count/1000,n,collect,lmm2,reml,q) - j,lines = q.get() - for line in lines: - out.write(line) - - # print collect - ps = [item[1][3] for item in collect] - ts = [item[1][2] for item in collect] - print ps + res.append(lst) + + # print res + ts = [item[0] for res1 in res for item in res1] + ps = [item[1] for res1 in res for item in res1] + # ts = [item[1] for item in res] + # print ps return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index cba47a9b..d50b0111 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -329,11 +329,11 @@ class LMM2: """ if False: - print "X=",X - print "h=",h - print "q=",self.q - print "self.Kve=",self.Kve - print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack + print "X=",X + print "h=",h + print "q=",self.q + print "self.Kve=",self.Kve + print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack if stack: # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) -- cgit v1.2.3 From 7268ed46c9bce6fea58b55723c325426bdeeb617 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 10:08:41 +0300 Subject: GWAS multi-core: Tweak queue --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index f8d77ab6..2a472717 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -146,8 +146,8 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): completed += 1 except Queue.Empty: pass - if job > completed + cpu_num + 5: - time.sleep(1) + if job > completed + cpu_num*2: + time.sleep(0.1) else: if job >= completed: break -- cgit v1.2.3 From 7c958796142e9fe0cc3155d10581d68ae2bbc5df Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 10:39:58 +0300 Subject: lmm.py: bring to original state --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index c42f9fb7..994b6be7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -688,15 +688,11 @@ class LMM: optimum. """ - print("H=",H) - print("X=",X) - print("REML=",REML) n = len(self.LLs) HOpt = [] for i in range(1,n-2): if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - print("HOpt=",HOpt) if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] -- cgit v1.2.3 From 46170b2741d006db89ce69128feff51c6e0e7752 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 11:05:30 +0300 Subject: lmm1: fixed a few regressions introduced while debugging --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 42 ++++++++++++++++++++----------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 +-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 00f48939..0c43587e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -168,7 +168,7 @@ def kvakve(K, verbose=True): print("Kve is: ", Kve.shape, Kve) if sum(Kva < 1e-6): - if verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(Kva < 1e-6))) + if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0))) Kva[Kva < 1e-6] = 1e-6 return Kva,Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 994b6be7..2014ffb8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -271,7 +271,7 @@ def run_other_old(pheno_vector, """ - print("In run_other (old)") + print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) @@ -314,7 +314,7 @@ def run_other_new(pheno_vector, """ - print("In run_other (new)") + print("Running the new LMM2 engine in run_other_new") print("REML=",restricted_max_likelihood," REFIT=",refit) # Adjust phenotypes @@ -349,12 +349,10 @@ def run_other_new(pheno_vector, Bench().report() return p_values, t_stats -run_other = run_other_new +# def matrixMult(A,B): +# return np.dot(A,B) def matrixMult(A,B): - return np.dot(A,B) - -def matrixMult_old(A,B): # If there is no fblas then we will revert to np.dot() @@ -772,7 +770,10 @@ class LMM: ts = beta / np.sqrt(var * sigma) ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) - if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(") + if not len(ts) == 1 or not len(ps) == 1: + print("ts=",ts) + print("ps=",ps) + raise Exception("Something bad happened :(") return ts.sum(),ps.sum() def plotFit(self,color='b-',title=''): @@ -798,7 +799,11 @@ class LMM: pl.title(title) -def gn2_redis(key,species): +def gn2_redis(key,species,new_code=True): + """ + Invoke pylmm using Redis as a container. new_code runs the new + version + """ json_params = Redis.get(key) params = json.loads(json_params) @@ -818,11 +823,18 @@ def gn2_redis(key,species): refit = params['refit'], tempdata = tempdata) else: - ps, ts = run_other(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) + if new_code: + ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']), + genotype_matrix = geno, + restricted_max_likelihood = params['restricted_max_likelihood'], + refit = params['refit'], + tempdata = tempdata) + else: + ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']), + genotype_matrix = geno, + restricted_max_likelihood = params['restricted_max_likelihood'], + refit = params['refit'], + tempdata = tempdata) results_key = "pylmm:results:" + params['temp_uuid'] @@ -847,7 +859,7 @@ def gn2_main(): gn2_redis(key,species) -def gn2_load_redis(key,species,kinship,pheno,geno): +def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): print("Loading Redis from parsed data") if kinship == None: k = None @@ -868,7 +880,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gn2_redis(key,species) + return gn2_redis(key,species,new_code) if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 4268f3be..1af5a443 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -107,7 +107,7 @@ if cmd == 'redis_new': gt = G.T G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt) + ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) print np.array(ps) print sum(ps) # Test results @@ -142,7 +142,7 @@ if cmd == 'redis': gt = G.T G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt) + ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) print np.array(ps) print sum(ps) # Test results -- cgit v1.2.3 From 73ed2f67bd0478473b887902ae96caaa0fca58d4 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 13:10:49 +0300 Subject: GWAS: one result is missing --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 33 +++++++++++++----------------- wqflask/wqflask/my_pylmm/pyLMM/input.py | 2 ++ wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 10 ++++++--- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 17 ++++++++++------ wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 35 +++++++++++++++++--------------- 5 files changed, 53 insertions(+), 44 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index 2a472717..20083bde 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -66,7 +66,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): """ matrix_initialize() cpu_num = mp.cpu_count() - numThreads = None + numThreads = None # for now use all available threads kfile2 = False reml = restricted_max_likelihood @@ -110,10 +110,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): # PS = [] # TS = [] count = 0 - - completed = 0 - last_j = 0 - # for snp_id in G: + jobs_running = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 @@ -129,13 +126,12 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): j,lst = q.get() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - # for line in lines: - # out.write(line) res.append(lst) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) + jobs_running += 1 collect = [] - while job > completed: + while jobs_running: try: j,lst = q.get_nowait() if verbose: @@ -143,34 +139,33 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): # for line in lines: # out.write(line) res.append(lst) - completed += 1 + jobs_running -= 1 except Queue.Empty: pass - if job > completed + cpu_num*2: - time.sleep(0.1) + if jobs_running + cpu_num*2 > 0: + time.sleep(1.0) else: - if job >= completed: + if jobs_running > 0: break collect.append(snp_id) - if numThreads==1: + if numThreads==1 or count<1000: print "Running on 1 THREAD" compute_snp(count/1000,n,collect,lmm2,reml,q) j,lst = q.get() - # for line in lines: - # out.write(line) res.append(lst) else: - for job in range(int(count/1000)-completed): + print "count=",count," running=",jobs_running," collect=",len(collect) + for job in range(jobs_running): j,lst = q.get(True,15) # time out if verbose: sys.stderr.write("Job "+str(j)+" finished\n") res.append(lst) - # print res + if verbose: + print "res=",res[0][0:10] + print [len(res1) for res1 in res] ts = [item[0] for res1 in res for item in res1] ps = [item[1] for res1 in res for item in res1] - # ts = [item[1] for item in res] - # print ps return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py index f7b556a5..7063fedf 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py @@ -135,6 +135,8 @@ class plink: def normalizeGenotype(self,G): # print "Before",G # print G.shape + print "call input.normalizeGenotype" + raise "This should not be used" x = True - np.isnan(G) m = G[x].mean() s = np.sqrt(G[x].var()) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 2014ffb8..8a24d98b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -285,7 +285,7 @@ def run_other_old(pheno_vector, # with Bench("LMM_ob fitting"): # lmm_ob.fit() - print("genotype_matrix: ", genotype_matrix.shape) + print("run_other_old genotype_matrix: ", genotype_matrix.shape) print(genotype_matrix) with Bench("Doing GWAS"): @@ -320,6 +320,7 @@ def run_other_new(pheno_vector, # Adjust phenotypes Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True) print("Removed missing phenotypes",Y.shape) + # if options.maf_normalization: # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) # print "MAF replacements: \n",G @@ -337,7 +338,7 @@ def run_other_new(pheno_vector, # with Bench("LMM_ob fitting"): # lmm_ob.fit() - print("genotype_matrix: ", G.shape) + print("run_other_new genotype_matrix: ", G.shape) print(G) with Bench("Doing GWAS"): @@ -388,7 +389,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ + print("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) + print("call calculate_kinship_new") return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): @@ -399,6 +402,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): normalizes the resulting vectors and returns the RRM matrix. """ + print("call calculate_kinship_old") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] print("genotype 2D matrix n (inds) is:", n) @@ -429,7 +433,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): temp_data.store("percent_complete", percent_complete) genotype_matrix = genotype_matrix[:,keep] - print("genotype_matrix: ", pf(genotype_matrix)) + print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix,genotype_matrix diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d50b0111..d4b3ac82 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -22,6 +22,7 @@ from scipy.linalg import eigh, inv, det import scipy.stats as stats # t-tests from scipy import optimize from optmatrix import matrixMult +import kinship def calculateKinship(W,center=False): """ @@ -177,13 +178,17 @@ class LMM2: Kve = [] self.nonmissing = x - if len(Kva) == 0 or len(Kve) == 0: - if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - Kva,Kve = eigh(K) - end = time.time() - if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + print("this K is:", K.shape, K) + if len(Kva) == 0 or len(Kve) == 0: + # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + begin = time.time() + # Kva,Kve = linalg.eigh(K) + Kva,Kve = kinship.kvakve(K) + end = time.time() + if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) + self.K = K self.Kva = Kva self.Kve = Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 1af5a443..708c9185 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -54,9 +54,9 @@ parser.add_option("--geno",dest="geno", parser.add_option("--maf-normalization", action="store_true", dest="maf_normalization", default=False, help="Apply MAF genotype normalization") -parser.add_option("--skip-genotype-normalization", - action="store_true", dest="skip_genotype_normalization", default=False, - help="Skip genotype normalization") +parser.add_option("--genotype-normalization", + action="store_true", dest="genotype_normalization", default=False, + help="Force genotype normalization") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") @@ -100,7 +100,8 @@ if options.geno: print g.shape if cmd == 'redis_new': - # Emulating the redis setup of GN2 + # The main difference between redis_new and redis is that missing + # phenotypes are handled by the first Y = y G = g print "Original G",G.shape, "\n", G @@ -109,7 +110,7 @@ if cmd == 'redis_new': G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) print np.array(ps) - print sum(ps) + print len(ps),sum(ps) # Test results p1 = round(ps[0],4) p2 = round(ps[-1],4) @@ -118,12 +119,14 @@ if cmd == 'redis_new': assert p1==0.0708, "p1=%f" % p1 assert p2==0.1417, "p2=%f" % p2 if options.geno == 'data/small_na.geno': - assert p1==0.0958, "p1=%f" % p1 - assert p2==0.0435, "p2=%f" % p2 + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': assert p1==0.8984, "p1=%f" % p1 - assert p2==0.9623, "p2=%f" % p2 -if cmd == 'redis': + assert p2==0.9620, "p2=%f" % p2 + assert sum(ps) == 4070.02346579 + assert len(ps) == 8000 +elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G @@ -135,7 +138,7 @@ if cmd == 'redis': if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G - if not options.skip_genotype_normalization: + if options.genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) g = None gnt = None @@ -144,8 +147,8 @@ if cmd == 'redis': G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) print np.array(ps) - print sum(ps) - # Test results + print len(ps),sum(ps) + # Test results 4070.02346579 p1 = round(ps[0],4) p2 = round(ps[-1],4) sys.stderr.write(options.geno+"\n") @@ -153,11 +156,11 @@ if cmd == 'redis': assert p1==0.0708, "p1=%f" % p1 assert p2==0.1417, "p2=%f" % p2 if options.geno == 'data/small_na.geno': - assert p1==0.0958, "p1=%f" % p1 - assert p2==0.0435, "p2=%f" % p2 + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': assert p1==0.8984, "p1=%f" % p1 - assert p2==0.9623, "p2=%f" % p2 + assert p2==0.8827, "p2=%f" % p2 elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G @@ -169,7 +172,7 @@ elif cmd == 'kinship': if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G - if not options.skip_genotype_normalization: + if options.genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) g = None gnt = None -- cgit v1.2.3 From 5cee365cc2d8bf8cafca018b9a04e4821616899d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 13:27:38 +0300 Subject: GWAS: should be correct now --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 26 ++++++++++++-------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 6 +++--- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index 20083bde..ce7842f7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -131,37 +131,35 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) jobs_running += 1 collect = [] - while jobs_running: + while jobs_running > cpu_num: try: j,lst = q.get_nowait() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - # for line in lines: - # out.write(line) res.append(lst) jobs_running -= 1 except Queue.Empty: + time.sleep(0.1) pass - if jobs_running + cpu_num*2 > 0: + if jobs_running > cpu_num*2: time.sleep(1.0) else: - if jobs_running > 0: - break + break collect.append(snp_id) - if numThreads==1 or count<1000: + if numThreads==1 or count<1000 or len(collect)>0: print "Running on 1 THREAD" compute_snp(count/1000,n,collect,lmm2,reml,q) + collect = [] j,lst = q.get() res.append(lst) - else: - print "count=",count," running=",jobs_running," collect=",len(collect) - for job in range(jobs_running): - j,lst = q.get(True,15) # time out - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") - res.append(lst) + print "count=",count," running=",jobs_running," collect=",len(collect) + for job in range(jobs_running): + j,lst = q.get(True,15) # time out + if verbose: + sys.stderr.write("Job "+str(j)+" finished\n") + res.append(lst) if verbose: print "res=",res[0][0:10] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 708c9185..e301ef1a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -122,9 +122,9 @@ if cmd == 'redis_new': assert p1==0.0897, "p1=%f" % p1 assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert p1==0.8984, "p1=%f" % p1 - assert p2==0.9620, "p2=%f" % p2 - assert sum(ps) == 4070.02346579 + # assert p1==0.8984, "p1=%f" % p1 + # assert p2==0.9621, "p2=%f" % p2 + assert round(sum(ps)) == 4070 assert len(ps) == 8000 elif cmd == 'redis': # Emulating the redis setup of GN2 -- cgit v1.2.3 From 62f287b551a3b207eadffb52d6de1e0ef64a1a08 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 16:28:08 +0300 Subject: runlmm: Explicit handling of missing phenotypes --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index e301ef1a..324c4f2c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -57,6 +57,9 @@ parser.add_option("--maf-normalization", parser.add_option("--genotype-normalization", action="store_true", dest="genotype_normalization", default=False, help="Force genotype normalization") +parser.add_option("--remove-missing-phenotypes", + action="store_true", dest="remove_missing_phenotypes", default=False, + help="Remove missing phenotypes") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") @@ -130,11 +133,13 @@ elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None: + if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T print "Removed missing phenotypes",G.shape, "\n", G + else: + Y = y if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G @@ -159,12 +164,12 @@ elif cmd == 'redis': assert p1==0.0897, "p1=%f" % p1 assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert p1==0.8984, "p1=%f" % p1 - assert p2==0.8827, "p2=%f" % p2 + assert round(sum(ps)) == 4070 + assert len(ps) == 8000 elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G - if y != None: + if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g = phenotype.remove_missing(y,g.T,options.verbose) G = g.T @@ -193,11 +198,11 @@ elif cmd == 'kinship': sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) if options.geno == 'data/small.geno': - assert k1==0.7939, "k1=%f" % k1 + assert k1==0.8, "k1=%f" % k1 assert k2==0.7939, "k2=%f" % k2 assert k3==0.7939, "k3=%f" % k3 if options.geno == 'data/small_na.geno': - assert k1==0.7172, "k1=%f" % k1 + assert k1==0.8333, "k1=%f" % k1 assert k2==0.7172, "k2=%f" % k2 assert k3==0.7172, "k3=%f" % k3 if options.geno == 'data/test8000.geno': -- cgit v1.2.3 From 42c94e94414da06d85dbb6f53e77eb660624b5d8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 16:35:20 +0300 Subject: README: Started release information and versioning of module --- wqflask/wqflask/my_pylmm/README.md | 9 +++++++++ wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 1 + 2 files changed, 10 insertions(+) create mode 100644 wqflask/wqflask/my_pylmm/README.md diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md new file mode 100644 index 00000000..0c809610 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/README.md @@ -0,0 +1,9 @@ +# RELEASE NOTES + +## 0.50-gn2-pre1 + +1. This is the first test release of multi-core pylmm into GN2. Both + Kinship K calculation and GWAS have been made multi-threaded by + introducing the Python multiprocessing module. Note that only + run_other has been updated to use the new routines. + \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index e69de29b..c40c3221 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -0,0 +1 @@ +PYLMM_VERSION="0.50-gn2-pre1" -- cgit v1.2.3 From c4132c7ec5e0d720c4229563c9edddbcfc2e7fa6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 17:03:53 +0300 Subject: GWAS: sort results so we always get the same list --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index ce7842f7..b901c0e2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -110,13 +110,13 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): # PS = [] # TS = [] count = 0 + job = 0 jobs_running = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 - # print count,snp_id if count % 1000 == 0: - job = count/1000 + job += 1 if verbose: sys.stderr.write("Job %d At SNP %d\n" % (job,count)) if numThreads == 1: @@ -126,7 +126,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): j,lst = q.get() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - res.append(lst) + res.append((j,lst)) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) jobs_running += 1 @@ -136,7 +136,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): j,lst = q.get_nowait() if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - res.append(lst) + res.append((j,lst)) jobs_running -= 1 except Queue.Empty: time.sleep(0.1) @@ -149,21 +149,25 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): collect.append(snp_id) if numThreads==1 or count<1000 or len(collect)>0: - print "Running on 1 THREAD" - compute_snp(count/1000,n,collect,lmm2,reml,q) + job += 1 + print "Collect final batch size %i job %i @%i: " % (len(collect), job, count) + compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() - res.append(lst) + res.append((j,lst)) print "count=",count," running=",jobs_running," collect=",len(collect) for job in range(jobs_running): j,lst = q.get(True,15) # time out if verbose: sys.stderr.write("Job "+str(j)+" finished\n") - res.append(lst) - - if verbose: - print "res=",res[0][0:10] - print [len(res1) for res1 in res] - ts = [item[0] for res1 in res for item in res1] - ps = [item[1] for res1 in res for item in res1] + res.append((j,lst)) + + print "Before sort",[res1[0] for res1 in res] + res = sorted(res,key=lambda x: x[0]) + # if verbose: + # print "res=",res[0][0:10] + print "After sort",[res1[0] for res1 in res] + print [len(res1[1]) for res1 in res] + ts = [item[0] for j,res1 in res for item in res1] + ps = [item[1] for j,res1 in res for item in res1] return ts,ps -- cgit v1.2.3 From e615b83d34bcc991e671241db71c6ad0c0267479 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 17 Mar 2015 17:15:56 +0300 Subject: Release 0.50-gn2-pre1 --- wqflask/wqflask/my_pylmm/README.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index 0c809610..f6b0e72d 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,9 +1,21 @@ # RELEASE NOTES -## 0.50-gn2-pre1 +## 0.50-gn2-pre1 release + +This is the first test release of multi-core pylmm into GN2. Both +kinship calculation and GWAS have been made multi-threaded by +introducing the Python multiprocessing module. Note that only +run_other has been updated to use the new routines (so human is still +handled the old way). I have taken care that we can still run both +old-style and new-style LMM (through passing the 'new_code' +boolean). This could be an option in the web server for users to +select and test for any unexpected differences (of which there should +be none, naturally ;). + +The current version can handle missing phenotypes, but as they are +removed there is no way for GN2 to know what SNPs the P-values belong +to. A future version will pass a SNP index to allow for missing +phenotypes. + -1. This is the first test release of multi-core pylmm into GN2. Both - Kinship K calculation and GWAS have been made multi-threaded by - introducing the Python multiprocessing module. Note that only - run_other has been updated to use the new routines. \ No newline at end of file -- cgit v1.2.3 From 5fb4245a3d08f7107cf885639fc275a6d73ea82d Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 6 Apr 2015 15:46:46 +0000 Subject: Removed dataSharing directory and its files since they aren't being used, but I should note that a couple of them were changed from GN1; I think that there was some attempt to implement this very early into GN2's development, but we'd probably be better off starting from scratch since a number of things have changed since then (such as the way we call queries) Removed the Singapore link from the mirrors section of the index page since it wasn't working --- wqflask/wqflask/dataSharing/SharingBody.py | 290 --------------------- wqflask/wqflask/dataSharing/SharingInfo.py | 146 ----------- wqflask/wqflask/dataSharing/SharingInfoAddPage.py | 47 ---- .../wqflask/dataSharing/SharingInfoDeletePage.py | 55 ---- wqflask/wqflask/dataSharing/SharingInfoEditPage.py | 51 ---- wqflask/wqflask/dataSharing/SharingInfoPage.py | 64 ----- .../wqflask/dataSharing/SharingInfoUpdatePage.py | 109 -------- .../wqflask/dataSharing/SharingListDataSetPage.py | 99 ------- wqflask/wqflask/dataSharing/SharingPage.py | 40 --- wqflask/wqflask/dataSharing/__init__.py | 0 wqflask/wqflask/templates/index_page.html | 1 - 11 files changed, 902 deletions(-) delete mode 100755 wqflask/wqflask/dataSharing/SharingBody.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfo.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoAddPage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoDeletePage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoEditPage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoPage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingListDataSetPage.py delete mode 100755 wqflask/wqflask/dataSharing/SharingPage.py delete mode 100755 wqflask/wqflask/dataSharing/__init__.py diff --git a/wqflask/wqflask/dataSharing/SharingBody.py b/wqflask/wqflask/dataSharing/SharingBody.py deleted file mode 100755 index 880161f5..00000000 --- a/wqflask/wqflask/dataSharing/SharingBody.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -sharing_body_string = """ - -""" - -sharinginfo_body_string = """ -""" - -sharinginfoedit_body_string = """""" diff --git a/wqflask/wqflask/dataSharing/SharingInfo.py b/wqflask/wqflask/dataSharing/SharingInfo.py deleted file mode 100755 index 41a75222..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfo.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from __future__ import print_function, division - -from pprint import pformat as pf -from collections import namedtuple - -import requests - -from dbFunction import webqtlDatabaseFunction -import SharingBody - -#import logging -#logging.basicConfig(filename="/tmp/flask_gn_log", level=logging.INFO) -# -#_log = logging.getLogger("search") -#_ch = logging.StreamHandler() -#_log.addHandler(_ch) - - - -######################################### -# Sharing Info -######################################### -class SharingInfo(object): - - def __init__(self, GN_AccessionId, InfoPageName): - print("In SharingInfo") - self.GN_AccessionId = GN_AccessionId - self.InfoPageName = InfoPageName - - def getInfo(self): - cursor = webqtlDatabaseFunction.getCursor() - if (not cursor): - return - - field_names = """Id, GEO_Series, Status, Title, Organism, Experiment_Type, - Summary, Overall_Design, Contributor, Citation, Submission_Date, - Contact_Name, Emails, Phone, URL, Organization_Name, Department, - Laboratory, Street, City, State, ZIP, Country, Platforms, - Samples, Species, Normalization, InbredSet, InfoPageName, - DB_Name, Organism_Id, InfoPageTitle, GN_AccesionId, Tissue, - AuthorizedUsers, About_Cases, About_Tissue, About_Download, - About_Array_Platform, About_Data_Values_Processing, - Data_Source_Acknowledge, Progreso """ - - # We can use string interpolation here cause we own the string - sql = """select %s from InfoFiles where """ % (field_names) - if self.GN_AccessionId: - sql += "GN_AccesionId = %s" - cursor.execute(sql, self.GN_AccessionId) - elif self.InfoPageName: - sql += "InfoPageName = %s" - cursor.execute(sql, self.InfoPageName) - else: - raise Exception('No correct parameter found') - info = cursor.fetchone() - - info = todict(field_names, info) - - # fetch datasets file list - filelist = [] - if info["GN_AccesionId"]: - url = "http://atlas.uthsc.edu/scandatasets.php?GN_AccesionId=%s" % ( - info["GN_AccesionId"]) - try: - response = requests.get(url) - except Exception as why: - log.exception("Problem conneting to:", url) - if response: - data = response.text - filelist = data.split() - - return info, filelist - - - def getBody(self, infoupdate=""): - info, filelist = self.getInfo() - if filelist: - htmlfilelist = '
    \n' - for i in range(len(filelist)): - if i%2==0: - filename = filelist[i] - filesize = filelist[i+1] - htmlfilelist += "
  • " - htmlfilelist += '%s' % (self.GN_AccessionId, filename, filename) - htmlfilelist += '   ' - #r=re.compile(r'(?<=\d)(?=(\d\d\d)+(?!\d))') - #htmlfilelist += '[%s B]' % r.sub(r',',filesize) - if 12 < len(filesize): - filesize=filesize[0:-12] - filesize += ' T' - elif 9 < len(filesize): - filesize=filesize[0:-9] - filesize += ' G' - elif 6 < len(filesize): - filesize=filesize[0:-6] - filesize += ' M' - elif 3 < len(filesize): - filesize=filesize[0:-3] - filesize += ' K' - htmlfilelist += '[%sB]' % filesize - htmlfilelist += "
  • \n" - htmlfilelist += "
" - else: - htmlfilelist = "Data sets are not available or are not public yet." - - return info, htmlfilelist - #return SharingBody.sharinginfo_body_string % (info[31], info[32], infoupdate, info[32], info[1], info[3], info[30], info[4], info[27], info[33], info[2], info[23], info[26], info[11], info[15], info[16], info[18], info[19], info[20], info[21], info[22], info[13], info[12], info[14], info[14], htmlfilelist, info[6], info[35], info[36], info[37], info[38], info[39], info[40], info[5], info[7], info[8], info[9], info[10], info[17], info[24]) - - -def todict(fields, values): - """Converts sql results into a user friendly dictionary""" - new_dict = {} - fields = fields.split(",") - for counter, field in enumerate(fields): - field = field.strip() - value = values[counter] - if isinstance(value, str): - value = unicode(value, "utf-8") - new_dict[field] = value - return new_dict diff --git a/wqflask/wqflask/dataSharing/SharingInfoAddPage.py b/wqflask/wqflask/dataSharing/SharingInfoAddPage.py deleted file mode 100755 index 452fb474..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfoAddPage.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from base.templatePage import templatePage -from base import webqtlConfig -import SharingBody -import SharingInfo - - -######################################### -# Sharing Info Edit Page -######################################### -class SharingInfoAddPage(templatePage): - - def __init__(self, fd=None): - templatePage.__init__(self, fd) - if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']: - pass - else: - heading = "Adding Info" - detail = ["You don't have the permission to add new dataset"] - self.error(heading=heading,detail=detail,error="Error") - return - self.dict['body'] = SharingBody.sharinginfoedit_body_string % ("Add new dataset", "-1", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "") diff --git a/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py b/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py deleted file mode 100755 index a9c785c6..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfoDeletePage.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from base.templatePage import templatePage -from base import webqtlConfig -from dbFunction import webqtlDatabaseFunction -import SharingBody -import SharingInfo - - -######################################### -# Sharing Info Delete Page -######################################### -class SharingInfoDeletePage(templatePage): - - def __init__(self, fd=None): - templatePage.__init__(self, fd) - if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']: - pass - else: - heading = "Deleting Info" - detail = ["You don't have the permission to delete this dataset"] - self.error(heading=heading,detail=detail,error="Error") - return - cursor = webqtlDatabaseFunction.getCursor() - if (not cursor): - return - GN_AccessionId = fd.formdata.getvalue('GN_AccessionId') - sql = "delete from InfoFiles where GN_AccesionId=%s" - cursor.execute(sql, GN_AccessionId) - re = cursor.fetchone() - self.dict['body'] = "Delete dataset info record (GN_AccesionId=%s) successfully." % GN_AccessionId diff --git a/wqflask/wqflask/dataSharing/SharingInfoEditPage.py b/wqflask/wqflask/dataSharing/SharingInfoEditPage.py deleted file mode 100755 index c5f4ed22..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfoEditPage.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from base.templatePage import templatePage -from base import webqtlConfig -import SharingBody -import SharingInfo - - -######################################### -# Sharing Info Edit Page -######################################### -class SharingInfoEditPage(templatePage): - - def __init__(self, fd=None): - templatePage.__init__(self, fd) - if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']: - pass - else: - heading = "Editing Info" - detail = ["You don't have the permission to edit this dataset"] - self.error(heading=heading,detail=detail,error="Error") - return - GN_AccessionId = fd.formdata.getvalue('GN_AccessionId') - InfoPageName = fd.formdata.getvalue('InfoPageName') - sharingInfoObject = SharingInfo.SharingInfo(GN_AccessionId, InfoPageName) - info, filelist = sharingInfoObject.getInfo() - self.dict['body'] = SharingBody.sharinginfoedit_body_string % (info[31], info[0], info[11], info[12], info[13], info[14], info[15], info[16], info[17], info[18], info[19], info[20], info[21], info[22], info[6], info[5], info[35], info[36], info[37], info[38], info[39], info[7], info[8], info[9], info[40], info[32], info[31], info[1], info[2], info[3], info[30], info[4], info[10], info[23], info[25], info[33], info[26], info[27], info[28], info[24], info[34], info[41]) diff --git a/wqflask/wqflask/dataSharing/SharingInfoPage.py b/wqflask/wqflask/dataSharing/SharingInfoPage.py deleted file mode 100755 index c32eec50..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfoPage.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from __future__ import print_function, division - -from pprint import pformat as pf - -import urlparse - -import flask - -from base import webqtlConfig -from dbFunction import webqtlDatabaseFunction -import SharingBody -import SharingInfo - - -######################################### -# Sharing Info Page -######################################### -class SharingInfoPage(): - - def __init__(self, fd): - self.redirect_url = None # Set if you want a redirect - print("fd is:", pf(fd.__dict__)) - # Todo: Need a [0] in line below????d - GN_AccessionId = fd.get('GN_AccessionId') # Used under search datasharing - InfoPageName = fd.get('database') # Might need to add a [0] - cursor = webqtlDatabaseFunction.getCursor() - if InfoPageName and not GN_AccessionId: - sql = "select GN_AccesionId from InfoFiles where InfoPageName = %s" - cursor.execute(sql, InfoPageName) - GN_AccessionId = cursor.fetchone() - self.redirect_url = urlparse.urljoin(webqtlConfig.ROOT_URL, "/data_sharing?GN_AccessionId=%s" % GN_AccessionId) - #self.redirect_url = flask.url_for('data_sharing', GN_AccessionId=GN_AccessionId[0]) - print("set self.redirect_url") - #print("before redirect") - #return flask.redirect(url) - #print("after redirect") - else: - CauseError diff --git a/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py b/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py deleted file mode 100755 index 181f2eed..00000000 --- a/wqflask/wqflask/dataSharing/SharingInfoUpdatePage.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -import MySQLdb - -from base.templatePage import templatePage -from base import webqtlConfig -from dbFunction import webqtlDatabaseFunction -import SharingBody -import SharingInfo - -######################################### -# Sharing Info Update Page -######################################### -class SharingInfoUpdatePage(templatePage): - - def __init__(self, fd=None): - templatePage.__init__(self, fd) - if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']: - pass - else: - heading = "Editing Info" - detail = ["You don't have the permission to modify this file"] - self.error(heading=heading,detail=detail,error="Error") - return - cursor = webqtlDatabaseFunction.getCursor() - if (not cursor): - return - Id=fd.formdata.getvalue('Id') - GN_AccesionId=fd.formdata.getvalue('GN_AccesionId') - GEO_Series=fd.formdata.getvalue('GEO_Series') - Status=fd.formdata.getvalue('Status') - Title=fd.formdata.getvalue('Title') - Organism_Id=fd.formdata.getvalue('Organism_Id') - Organism=fd.formdata.getvalue('Organism') - Experiment_Type =fd.formdata.getvalue('Experiment_Type') - Summary=fd.formdata.getvalue('Summary') - Overall_Design=fd.formdata.getvalue('Overall_Design') - Contributor=fd.formdata.getvalue('Contributor') - Citation=fd.formdata.getvalue('Citation') - Submission_Date=fd.formdata.getvalue('Submission_Date') - Contact_Name=fd.formdata.getvalue('Contact_Name') - Emails=fd.formdata.getvalue('Emails') - Phone=fd.formdata.getvalue('Phone') - URL=fd.formdata.getvalue('URL') - Organization_Name=fd.formdata.getvalue('Organization_Name') - Department=fd.formdata.getvalue('Department') - Laboratory=fd.formdata.getvalue('Laboratory') - Street=fd.formdata.getvalue('Street') - City=fd.formdata.getvalue('City') - State=fd.formdata.getvalue('State') - ZIP=fd.formdata.getvalue('ZIP') - Country=fd.formdata.getvalue('Country') - Platforms=fd.formdata.getvalue('Platforms') - Samples=fd.formdata.getvalue('Samples') - Species=fd.formdata.getvalue('Species') - Tissue=fd.formdata.getvalue('Tissue') - Normalization=fd.formdata.getvalue('Normalization') - InbredSet=fd.formdata.getvalue('InbredSet') - InfoPageName=fd.formdata.getvalue('InfoPageName') - InfoPageTitle=fd.formdata.getvalue('InfoPageTitle') - About_Cases=fd.formdata.getvalue('About_Cases') - About_Tissue=fd.formdata.getvalue('About_Tissue') - About_Download=fd.formdata.getvalue('About_Download') - About_Array_Platform=fd.formdata.getvalue('About_Array_Platform') - About_Data_Values_Processing=fd.formdata.getvalue('About_Data_Values_Processing') - Data_Source_Acknowledge=fd.formdata.getvalue('Data_Source_Acknowledge') - AuthorizedUsers=fd.formdata.getvalue('AuthorizedUsers') - Progress=fd.formdata.getvalue('Progress') - if Id=='-1': - sharingInfoObject = SharingInfo.SharingInfo(GN_AccesionId, InfoPageName) - info, filelist = sharingInfoObject.getInfo() - if info: - heading = "Editing Info" - detail = ["The new dataset info record is duplicate."] - self.error(heading=heading, detail=detail, error="Error") - return - sql = """INSERT INTO InfoFiles SET GN_AccesionId=%s, GEO_Series=%s, Status=%s, Title=%s, Organism_Id=%s, Organism=%s, Experiment_Type=%s, Summary=%s, Overall_Design=%s, Contributor=%s, Citation=%s, Submission_Date=%s, Contact_Name=%s, Emails=%s, Phone=%s, URL=%s, Organization_Name=%s, Department=%s, Laboratory=%s, Street=%s, City=%s, State=%s, ZIP=%s, Country=%s, Platforms=%s, Samples=%s, Species=%s, Tissue=%s, Normalization=%s, InbredSet=%s, InfoPageName=%s, InfoPageTitle=%s, About_Cases=%s, About_Tissue=%s, About_Download=%s, About_Array_Platform=%s, About_Data_Values_Processing=%s, Data_Source_Acknowledge=%s, AuthorizedUsers=%s, Progreso=%s""" - cursor.execute(sql, tuple([GN_AccesionId, GEO_Series, Status, Title, Organism_Id, Organism, Experiment_Type, Summary, Overall_Design, Contributor, Citation, Submission_Date, Contact_Name, Emails, Phone, URL, Organization_Name, Department, Laboratory, Street, City, State, ZIP, Country, Platforms, Samples, Species, Tissue, Normalization, InbredSet, InfoPageName, InfoPageTitle, About_Cases, About_Tissue, About_Download, About_Array_Platform, About_Data_Values_Processing, Data_Source_Acknowledge, AuthorizedUsers, Progress])) - infoupdate="This record has been succesfully added." - else: - sql = """UPDATE InfoFiles SET GN_AccesionId=%s, GEO_Series=%s, Status=%s, Title=%s, Organism_Id=%s, Organism=%s, Experiment_Type=%s, Summary=%s, Overall_Design=%s, Contributor=%s, Citation=%s, Submission_Date=%s, Contact_Name=%s, Emails=%s, Phone=%s, URL=%s, Organization_Name=%s, Department=%s, Laboratory=%s, Street=%s, City=%s, State=%s, ZIP=%s, Country=%s, Platforms=%s, Samples=%s, Species=%s, Tissue=%s, Normalization=%s, InbredSet=%s, InfoPageName=%s, InfoPageTitle=%s, About_Cases=%s, About_Tissue=%s, About_Download=%s, About_Array_Platform=%s, About_Data_Values_Processing=%s, Data_Source_Acknowledge=%s, AuthorizedUsers=%s, Progreso=%s WHERE Id=%s""" - cursor.execute(sql, tuple([GN_AccesionId, GEO_Series, Status, Title, Organism_Id, Organism, Experiment_Type, Summary, Overall_Design, Contributor, Citation, Submission_Date, Contact_Name, Emails, Phone, URL, Organization_Name, Department, Laboratory, Street, City, State, ZIP, Country, Platforms, Samples, Species, Tissue, Normalization, InbredSet, InfoPageName, InfoPageTitle, About_Cases, About_Tissue, About_Download, About_Array_Platform, About_Data_Values_Processing, Data_Source_Acknowledge, AuthorizedUsers, Progress, Id])) - infoupdate="This record has been succesfully updated." - sharingInfoObject = SharingInfo.SharingInfo(GN_AccesionId, InfoPageName) - self.dict['body'] = sharingInfoObject.getBody(infoupdate=infoupdate) diff --git a/wqflask/wqflask/dataSharing/SharingListDataSetPage.py b/wqflask/wqflask/dataSharing/SharingListDataSetPage.py deleted file mode 100755 index 8685ac65..00000000 --- a/wqflask/wqflask/dataSharing/SharingListDataSetPage.py +++ /dev/null @@ -1,99 +0,0 @@ -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from htmlgen import HTMLgen2 as HT -from base import webqtlConfig - -from base.templatePage import templatePage - - -######################################### -# Sharing List DataSet Page -######################################### -class SharingListDataSetPage(templatePage): - - def __init__(self, fd=None): - templatePage.__init__(self, fd) - - if not self.openMysql(): - return - - if webqtlConfig.USERDICT[self.privilege] >= webqtlConfig.USERDICT['admin']: - pass - else: - heading = "Editing Info" - detail = ["You don't have the permission to list the datasets"] - self.error(heading=heading,detail=detail,error="Error") - return - - - TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee') - - query = """select GN_AccesionId, InfoPageTitle, Progreso from InfoFiles order by GN_AccesionId""" - self.cursor.execute(query) - result = self.cursor.fetchall() - - heading = HT.Paragraph('Dataset Table', Class="title") - - newrecord = HT.Href(text="New Record", url="/webqtl/main.py?FormID=sharinginfoadd") - - info = "Click the accession id to view the dataset info. Click the dataset name to edit the dataset info." - - datasetTable = HT.TableLite(border=0, cellpadding=0, cellspacing=0, Class="collap", width="100%") - - tableHeaderRow = HT.TR() - tableHeaderRow.append(HT.TD("Accession Id", Class='fs14 fwb ffl b1 cw cbrb', align="center")) - tableHeaderRow.append(HT.TD("Dataset name", Class='fs14 fwb ffl b1 cw cbrb', align="center")) - tableHeaderRow.append(HT.TD("Progress", Class='fs14 fwb ffl b1 cw cbrb', align="center")) - tableHeaderRow.append(HT.TD("Operation", Class='fs14 fwb ffl b1 cw cbrb', align="center")) - datasetTable.append(tableHeaderRow) - - for one_row in result: - Accession_Id, InfoPage_title, Progress = one_row - datasetRow = HT.TR() - datasetRow.append(HT.TD(HT.Href(text="GN%s" % Accession_Id, url="/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=%s" % Accession_Id, Class='fs12 fwn'), Class="fs12 fwn b1 c222")) - datasetRow.append(HT.TD(HT.Href(text="%s" % InfoPage_title, url="/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=%s" % Accession_Id, Class='fs12 fwn'), Class="fs12 fwn b1 c222")) - datasetRow.append(HT.TD("%s" % Progress, Class='fs12 fwn ffl b1 c222')) - operation_edit = HT.Href(text="Edit", url="/webqtl/main.py?FormID=sharinginfoedit&GN_AccessionId=%s" % Accession_Id) - operation_delete = HT.Href(text="Delete", onClick="deleteRecord(%s); return false;" % Accession_Id) - operation = HT.TD(Class="fs12 fwn b1 c222", align="center") - operation.append(operation_edit) - operation.append("    ") - operation.append(operation_delete) - datasetRow.append(operation) - datasetTable.append(datasetRow) - - TD_LR.append(heading, HT.P(), newrecord, HT.P(), info, HT.P(), datasetTable) - - js1 = """ """ - self.dict['js1'] = js1 - self.dict['body'] = str(TD_LR) diff --git a/wqflask/wqflask/dataSharing/SharingPage.py b/wqflask/wqflask/dataSharing/SharingPage.py deleted file mode 100755 index b244a6bd..00000000 --- a/wqflask/wqflask/dataSharing/SharingPage.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -from base.templatePage import templatePage -import SharingBody - -######################################### -# SharingPage -######################################### - -class SharingPage(templatePage): - - def __init__(self, fd): - templatePage.__init__(self, fd) - self.dict['title'] = 'GeneNetwork Data Sharing Zone' - self.dict['body'] = SharingBody.sharing_body_string - self.dict['js2'] = 'onload="javascript:initialDatasetSelection();"' diff --git a/wqflask/wqflask/dataSharing/__init__.py b/wqflask/wqflask/dataSharing/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index ba017bfb..5e0a92e3 100755 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -211,7 +211,6 @@
  • California at UCLA
  • Germany at the HZI
  • Memphis at the U of M
  • -
  • Singapore at the NUS
  • Switzerland at the EPFL
  • -- cgit v1.2.3 From 03d43722b043deff164420e07d6c7d5e8c128cd6 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 6 Apr 2015 15:57:11 +0000 Subject: Removed the old news code before pulling over Lei's changes Removed the SharingPage import from views since it doesn't exist anymore --- wqflask/wqflask/views.py | 41 +++++++++++--------------------- wqflask/wqflask/yaml_data/whats_new.yaml | 26 -------------------- 2 files changed, 14 insertions(+), 53 deletions(-) delete mode 100755 wqflask/wqflask/yaml_data/whats_new.yaml diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 7fec0456..b7864041 100755 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -45,8 +45,6 @@ from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from utility import temp_data -from wqflask.dataSharing import SharingInfo, SharingInfoPage - from base import webqtlFormData from utility.benchmark import Bench @@ -100,20 +98,20 @@ def tmp_page(img_path): img_base64 = bytesarray ) -@app.route("/data_sharing") -def data_sharing_page(): - print("In data_sharing") - fd = webqtlFormData.webqtlFormData(request.args) - print("1Have fd") - sharingInfoObject = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) - info, htmlfilelist = sharingInfoObject.getBody(infoupdate="") - print("type(htmlfilelist):", type(htmlfilelist)) - htmlfilelist = htmlfilelist.encode("utf-8") - #template_vars = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) - print("1 Made it to rendering") - return render_template("data_sharing.html", - info=info, - htmlfilelist=htmlfilelist) +#@app.route("/data_sharing") +#def data_sharing_page(): +# print("In data_sharing") +# fd = webqtlFormData.webqtlFormData(request.args) +# print("1Have fd") +# sharingInfoObject = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) +# info, htmlfilelist = sharingInfoObject.getBody(infoupdate="") +# print("type(htmlfilelist):", type(htmlfilelist)) +# htmlfilelist = htmlfilelist.encode("utf-8") +# #template_vars = SharingInfo.SharingInfo(request.args['GN_AccessionId'], None) +# print("1 Made it to rendering") +# return render_template("data_sharing.html", +# info=info, +# htmlfilelist=htmlfilelist) @app.route("/search", methods=('GET',)) @@ -161,17 +159,6 @@ def help(): doc = docs.Docs("help") return render_template("docs.html", **doc.__dict__) -@app.route("/news") -def news(): - #variables = whats_new.whats_new() - with open("/home/sam/gene/wqflask/wqflask/yaml_data/whats_new.yaml") as fh: - contents = fh.read() - yamilized = yaml.safe_load(contents) - news_items = yamilized['news'] - for news_item in news_items: - print("\nnews_item is: %s\n" % (news_item)) - return render_template("whats_new.html", news_items=news_items) - @app.route("/references") def references(): doc = docs.Docs("references") diff --git a/wqflask/wqflask/yaml_data/whats_new.yaml b/wqflask/wqflask/yaml_data/whats_new.yaml deleted file mode 100755 index 8f41a8f2..00000000 --- a/wqflask/wqflask/yaml_data/whats_new.yaml +++ /dev/null @@ -1,26 +0,0 @@ -news: -- - date: 2012-1-20 - title: Mouse SNPs from dbSNP have been added to GeneNetwork - details: - 10 million mouse SNPs from dbSNP (build 128) have been added to Variant Browser. - They could be searched by name (e.g. rs31192936) (Implemented by Xiaodong Zhou and Ning Liu). - -- - date: 2012-1-20 - title: Literature correlation has been update to 2011 version - details: - Dr. Ramin Homayouni and Dr. Lijing Xu kindly provide the 2011 version of mouse gene-gene - literature correlation matrix to GeneNetwork. (Implemented by Xiaodong Zhou). - -- - date: 2012-1-16 - title: Expression data set for EPFL/LISP BXD Muscle Affy Mouse Gene 1.0 ST (Dec11) RMA ** has been entered in GeneNetwork - details: - Laboratory of Integrative and Systems Physiology - (LISP). - This data set is not yet freely available for global analysis. - This data set has not yet been used or described in any publication. - Please contact Johan Auwerx or Evan Williams at evan.williams@epfl.ch - regarding use of these data. (Implemented by J Auwerx, E Williams, LA Rose, - RW Williams and A Centeno). \ No newline at end of file -- cgit v1.2.3 From 4ad55a0ebc1fc77da8e9aa6ec565b2b10c373e9b Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 6 Apr 2015 21:30:13 +0000 Subject: Just committing a couple minor bug fixes to get code working after pulling Pjotr's latest changes --- wqflask/wqflask/marker_regression/marker_regression.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 3fb9915b..7708356b 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -701,7 +701,7 @@ class MarkerRegression(object): no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - genotype_matrix = np.array(trimmed_genotype_data).T + genotype_matrix = np.array(genotype_data).T #print("pheno_vector: ", pf(pheno_vector)) #print("genotype_matrix: ", pf(genotype_matrix)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 58ff809b..53689071 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -43,13 +43,14 @@ Redis = Redis() import sys sys.path.append("/home/zas1024/gene/wqflask/") -sys.path.append("/home/danny/GeneNetwork/wqflask/wqflask/my_pylmm/pyLMM/") -print("sys.path2:", sys.path) has_gn2=True from utility.benchmark import Bench from utility import temp_data + +sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/") + from kinship import kinship, kinship_full, kvakve import genotype import phenotype -- cgit v1.2.3
    + {{newsitem.date}} {{newsitem.details|safe}} - -

    Data Set Download

    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - Species: - - - -
    - Group: - - - -
    - Type: - - - -
    - Database: - - - -
    -     -
    - - -
    - -

    GeneNetwork Accession Number

    -
    - - - - - - - - - - - - -
    GN:  E.g. 112
    -     -
    -
    - -
    -List of DataSets
    -

    %s -modify this page -%s -

    - - - - - -
    - - - - - - - - - - - -
    GN Accession: GN%s
    GEO Series: %s
    Title: %s
    Organism: %s
    Group: %s
    Tissue: %s
    Dataset Status: %s
    Platforms: %s
    Normalization: %s
    - See Contact Information
    -
    -
    - - - - - - - -
    Download datasets and supplementary data files
    %s
    -
    -
    -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Summary:
    %s

    About the cases used to generate this set of data:
    %s

    About the tissue used to generate this set of data:
    %s

    About downloading this data set:
    %s

    About the array platform:
    %s

    About data values and data processing:
    %s

    Data source acknowledgment:
    %s

    Experiment Type:
    %s

    Overall Design:
    %s

    Contributor:
    %s

    Citation:
    %s

    Submission Date:
    %s

    Laboratory:
    %s

    Samples:
    %s

    -

    -
    -

    %s

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    Principal Investigator

    Contact Name:
    Emails:
    Phone:
    URL:
    Organization Name:
    Department:
    Laboratory:
    Address:
    City:
    State:
    ZIP:
    Country:

    Summary

    Summary:

    Biology

    Experiment Design:
    About the cases used to
    generate this set of data:
    About the tissue used to
    generate this set of data:

    Technique

    About downloading this data set:
    About the array platform:

    Bioinformatics

    About data values and
    data processing:
    Overall Design:

    Misc

    Contributor:
    Citation:
    Data source acknowledgment:

    Administrator ONLY

    GN Accesion Id:
    DB Title in GN:
    GEO Series:
    Status:
    Title:
    Organism_Id (Taxonomy ID):
    Organism:
    Submission Date:
    Platforms:
    Species:
    Tissue:
    Normalization:
    Inbred Set:
    Info Page Name:
    Samples:
    Authorized Users:
    Progress:
    -