diff options
Diffstat (limited to 'wqflask')
25 files changed, 956 insertions, 380 deletions
diff --git a/wqflask/runserver.py b/wqflask/runserver.py index 9d5686a9..5a76d1e2 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -18,9 +18,10 @@ from wqflask import app #_ch = logging.StreamHandler() #_log.addHandler(_ch) +print app.config + import logging -#from themodule import TheHandlerYouWant -file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure") +file_handler = logging.FileHandler(app.config['LOGFILE']) file_handler.setLevel(logging.DEBUG) app.logger.addHandler(file_handler) @@ -28,7 +29,7 @@ import logging_tree logging_tree.printout() app.run(host='0.0.0.0', - port=5003, + port=app.config['SERVER_PORT'], use_debugger=False, threaded=True, use_reloader=True) diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py index 5eb901f7..5511826a 100755 --- a/wqflask/wqflask/interval_mapping/interval_mapping.py +++ b/wqflask/wqflask/interval_mapping/interval_mapping.py @@ -57,13 +57,8 @@ class IntervalMapping(object): self.set_options(start_vars) self.json_data = {} - - #if self.method == "qtl_reaper": self.json_data['lodnames'] = ['lod.hk'] self.gen_reaper_results(tempdata) - #else: - # self.gen_pylmm_results(tempdata) - #self.gen_qtl_results(tempdata) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} @@ -93,13 +88,7 @@ class IntervalMapping(object): def set_options(self, start_vars): """Sets various options (physical/genetic mapping, # permutations, which chromosome""" - #self.plot_scale = start_vars['scale'] - #if self.plotScale == 'physic' and not fd.genotype.Mbmap: - # self.plotScale = 'morgan' - #self.method = start_vars['mapping_method'] self.num_permutations = int(start_vars['num_perm']) - #self.do_bootstrap = start_vars['do_bootstrap'] - #self.selected_chr = start_vars['chromosome'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: @@ -112,9 +101,6 @@ class IntervalMapping(object): self.control_locus = start_vars['control_locus'] else: self.control_locus = None - #self.weighted_regression = start_vars['weighted'] - #self.lrs_lod = start_vars['lrs_lod'] - def gen_qtl_results(self, tempdata): """Generates qtl results for plotting interval map""" @@ -134,7 +120,7 @@ class IntervalMapping(object): #if self.weighted_regression: # self.lrs_array = self.genotype.permutation(strains = trimmed_samples, # trait = trimmed_values, - # variance = _vars, + # variance = variances, # nperm=self.num_permutations) #else: self.lrs_array = genotype.permutation(strains = trimmed_samples, @@ -175,12 +161,6 @@ class IntervalMapping(object): self.json_data['markernames'] = [] for qtl in reaper_results: reaper_locus = qtl.locus - #if reaper_locus.chr == "20": - # print("changing to X") - # self.json_data['chr'].append("X") - #else: - # self.json_data['chr'].append(reaper_locus.chr) - ##self.json_data['chr'].append(reaper_locus.chr) self.json_data['pos'].append(reaper_locus.Mb) self.json_data['lod.hk'].append(qtl.lrs) self.json_data['markernames'].append(reaper_locus.name) diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py new file mode 100644 index 00000000..b1ab780c --- /dev/null +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -0,0 +1,44 @@ +import os + +from base import webqtlConfig + +def run_gemma(this_dataset, samples, vals): + """Generates p-values for each marker using GEMMA""" + + print("INSIDE GEMMA_MAPPING") + + gen_pheno_txt_file(this_dataset, samples, vals) + + os.chdir("{}gemma".format(webqtlConfig.HTMLPATH)) + + gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o output/%s_output' % (this_dataset.group.name, + this_dataset.group.name, + this_dataset.group.name) + print("gemma_command:" + gemma_command) + + os.system(gemma_command) + + included_markers, p_values = parse_gemma_output(this_dataset) + + return included_markers, p_values + +def gen_pheno_txt_file(this_dataset, samples, vals): + """Generates phenotype file for GEMMA""" + + with open("{}gemma/{}.fam".format(webqtlConfig.HTMLPATH, this_dataset.group.name), "w") as outfile: + for i, sample in enumerate(samples): + outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(vals[i]) + "\n") + +def parse_gemma_output(this_dataset): + included_markers = [] + p_values = [] + with open("{}gemma/output/{}_output.assoc.txt".format(webqtlConfig.HTMLPATH, this_dataset.group.name)) as output_file: + for line in output_file: + if line.startswith("chr"): + continue + else: + included_markers.append(line.split("\t")[1]) + p_values.append(float(line.split("\t")[10])) + + print("p_values: ", p_values) + return included_markers, p_values
\ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 7708356b..bed316d7 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -25,12 +25,17 @@ from redis import Redis Redis = Redis() from flask import Flask, g +from wqflask import app from base.trait import GeneralTrait from base import data_set from base import species from base import webqtlConfig from utility import webqtlUtil +from wqflask.marker_regression import qtl_reaper_mapping +from wqflask.marker_regression import plink_mapping +from wqflask.marker_regression import gemma_mapping +from wqflask.marker_regression import rqtl_mapping from wqflask.my_pylmm.data import prep_data from wqflask.my_pylmm.pyLMM import lmm from wqflask.my_pylmm.pyLMM import input @@ -40,6 +45,14 @@ from utility import temp_data from utility.benchmark import Bench +import os +if os.environ.get('PYLMM_PATH') is None: + PYLMM_PATH=app.config.get('PYLMM_PATH') + if PYLMM_PATH is None: + PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM' +if not os.path.isfile(PYLMM_PATH+'/lmm.py'): + raise 'PYLMM_PATH unknown or faulty' +PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py' class MarkerRegression(object): @@ -73,7 +86,10 @@ class MarkerRegression(object): self.dataset.group.get_markers() if self.mapping_method == "gemma": - qtl_results = self.run_gemma() + included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) + self.dataset.group.get_specified_markers(markers = included_markers) + self.dataset.group.markers.add_pvalues(p_values) + qtl_results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": qtl_results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": @@ -88,9 +104,7 @@ class MarkerRegression(object): if start_vars['pair_scan'] == "true": self.pair_scan = True - print("pair scan:", self.pair_scan) - print("DOING RQTL GENO") qtl_results = self.run_rqtl_geno() print("qtl_results:", qtl_results) elif self.mapping_method == "plink": @@ -126,8 +140,9 @@ class MarkerRegression(object): #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary self.qtl_results = [] - for qtl in self.filtered_markers: - print("lod score is:", qtl['lod_score']) + for index,qtl in enumerate(self.filtered_markers): + if index<40: + print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") @@ -144,7 +159,7 @@ class MarkerRegression(object): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length - print("json_data:", self.json_data) + # print("json_data:", self.json_data) self.js_data = dict( @@ -156,11 +171,11 @@ class MarkerRegression(object): chromosomes = chromosome_mb_lengths, qtl_results = self.filtered_markers, ) + def run_gemma(self): """Generates p-values for each marker using GEMMA""" - #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name)) self.gen_pheno_txt_file() os.chdir("/home/zas1024/gene/web/gemma") @@ -272,7 +287,7 @@ class MarkerRegression(object): """) def run_rqtl_geno(self): - print("Calling R/qtl from python") + print("Calling R/qtl") self.geno_to_rqtl_function() @@ -381,9 +396,23 @@ class MarkerRegression(object): return pheno_as_string def process_pair_scan_results(self, result): - results = [] + pair_scan_results = [] + + result = result[1] + output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)] + print("R/qtl scantwo output:", output) + + for i, line in enumerate(result.iter_row()): + marker = {} + marker['name'] = result.rownames[i] + marker['chr1'] = output[i][0] + marker['Mb'] = output[i][1] + marker['chr2'] = int(output[i][2]) + pair_scan_results.append(marker) + + print("pair_scan_results:", pair_scan_results) - return results + return pair_scan_results def process_rqtl_results(self, result): # TODO: how to make this a one liner and not copy the stuff in a loop qtl_results = [] @@ -655,8 +684,7 @@ class MarkerRegression(object): Redis.set(key, json_params) Redis.expire(key, 60*60) - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - "other") + command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other") os.system(command) @@ -713,8 +741,8 @@ class MarkerRegression(object): # "refit": False, # "temp_data": tempdata} - print("genotype_matrix:", str(genotype_matrix.tolist())) - print("pheno_vector:", str(pheno_vector.tolist())) + # print("genotype_matrix:", str(genotype_matrix.tolist())) + # print("pheno_vector:", str(pheno_vector.tolist())) params = dict(pheno_vector = pheno_vector.tolist(), genotype_matrix = genotype_matrix.tolist(), @@ -732,7 +760,7 @@ class MarkerRegression(object): Redis.expire(key, 60*60) print("before printing command") - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") print("command is:", command) print("after printing command") @@ -745,7 +773,7 @@ class MarkerRegression(object): json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] - print("p_values:", p_values) + print("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] @@ -806,7 +834,7 @@ class MarkerRegression(object): print("Before creating the command") - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "human") print("command is:", command) diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index fa8c1aab..042cb8df 100755 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -194,4 +194,4 @@ def display_collapsible(number): def user_uuid(): """Unique cookie for a user""" user_uuid = request.cookies.get('user_uuid') -
\ No newline at end of file + diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index f6b0e72d..b844c845 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,21 +1,37 @@ -# RELEASE NOTES - -## 0.50-gn2-pre1 release - -This is the first test release of multi-core pylmm into GN2. Both -kinship calculation and GWAS have been made multi-threaded by -introducing the Python multiprocessing module. Note that only -run_other has been updated to use the new routines (so human is still -handled the old way). I have taken care that we can still run both -old-style and new-style LMM (through passing the 'new_code' -boolean). This could be an option in the web server for users to -select and test for any unexpected differences (of which there should -be none, naturally ;). - -The current version can handle missing phenotypes, but as they are -removed there is no way for GN2 to know what SNPs the P-values belong -to. A future version will pass a SNP index to allow for missing -phenotypes. +# Genenetwork2/pylmm RELEASE NOTES + +## 0.51-gn2 (April 19, 2015) + +- Improved GN2 integration +- Less matrix transposes +- Able to run pylmm standalone without Redis again (still requires + the modules) + +## 0.50-gn2 (April 2nd, 2015) + +- Replaced the GN2 genotype normalization + +## 0.50-gn2-pre2 (March 18, 2015) + +- Added abstractions for progress meter and info/debug statements; + Redis perc_complete is now updated through a lambda + +## 0.50-gn2-pre1 (release, March 17, 2015) + +- This is the first test release of multi-core pylmm into GN2. Both + kinship calculation and GWAS have been made multi-threaded by + introducing the Python multiprocessing module. Note that only + run_other has been updated to use the new routines (so human is + still handled the old way). I have taken care that we can still run + both old-style and new-style LMM (through passing the 'new_code' + boolean). This could be an option in the web server for users to + select and test for any unexpected differences (of which there + should be none, naturally ;). + +- The current version can handle missing phenotypes, but as they are + removed there is no way for GN2 to know what SNPs the P-values + belong to. A future version will pass a SNP index to allow for + missing phenotypes.
\ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index c40c3221..f33c4e74 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -1 +1 @@ -PYLMM_VERSION="0.50-gn2-pre1" +PYLMM_VERSION="0.51-gn2" diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py new file mode 100644 index 00000000..6c6c9f88 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py @@ -0,0 +1,44 @@ +from __future__ import print_function, division, absolute_import + +import collections +import inspect +import time + +class Bench(object): + entries = collections.OrderedDict() + + def __init__(self, name=None): + self.name = name + + def __enter__(self): + if self.name: + print("Starting benchmark: %s" % (self.name)) + else: + print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + self.start_time = time.time() + + def __exit__(self, type, value, traceback): + if self.name: + name = self.name + else: + name = "That" + + time_taken = time.time() - self.start_time + print(" %s took: %f seconds" % (name, (time_taken))) + + if self.name: + Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken + + + @classmethod + def report(cls): + total_time = sum((time_taken for time_taken in cls.entries.itervalues())) + print("\nTiming report\n") + for name, time_taken in cls.entries.iteritems(): + percent = int(round((time_taken/total_time) * 100)) + print("[{}%] {}: {}".format(percent, name, time_taken)) + print() + + def reset(cls): + """Reset the entries""" + cls.entries = collections.OrderedDict() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py index 3b6b5d70..4312fed0 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py @@ -1,5 +1,5 @@ -# This is a converter for common LMM formats, so as to keep complexity -# outside the main routines. +# This is a converter for common LMM formats, so as to keep file +# reader complexity outside the main routines. # Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) # @@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen Convert files for runlmm.py processing. Writes to stdout by default. try --help for more information + +Examples: + + python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno + + python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno """ # if len(args) == 0: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py index 315fd824..49f32e3a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py @@ -37,14 +37,15 @@ def normalize(ind_g): Run for every SNP list (for one individual) and return normalized SNP genotype values with missing data filled in """ - g1 = np.copy(ind_g) # avoid side effects - x = True - np.isnan(ind_g) # Matrix of True/False - m = ind_g[x].mean() # Global mean value - s = np.sqrt(ind_g[x].var()) # Global stddev - g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data - if s == 0: - g1 = g1 - m # Subtract the mean + g = np.copy(ind_g) # copy to avoid side effects + missing = np.isnan(g) + values = g[True - missing] + mean = values.mean() # Global mean value + stddev = np.sqrt(values.var()) # Global stddev + g[missing] = mean # Plug-in mean values for missing data + if stddev == 0: + g = g - mean # Subtract the mean else: - g1 = (g1 - m) / s # Normalize the deviation - return g1 + g = (g - mean) / stddev # Normalize the deviation + return g diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py new file mode 100644 index 00000000..821195c8 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -0,0 +1,110 @@ +# Standalone specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) + +from __future__ import absolute_import, print_function, division + +import numpy as np +import sys +import logging + +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') +logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) + +progress_location = None +progress_current = None +progress_prev_perc = None + +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func + +def progress(location, count, total): + global progress_location + global progress_prev_perc + + perc = round(count*100.0/total) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) + logger.info("Progress: %s %d%%" % (location,perc)) + progress_location = location + progress_prev_perc = perc + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") + +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, + fatal = fatal, + progress = progress, + mprint = mprint + ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index b901c0e2..247a8729 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -20,7 +20,6 @@ import pdb import time import sys -# from utility import temp_data import lmm2 import os @@ -32,16 +31,25 @@ from lmm2 import LMM2 import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses +except ImportError: + has_gn2=False + from standalone import uses + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + + def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - # print("COMPUTE SNP",j,snp_ids,"\n") result = [] for snp_id in snp_ids: snp,id = snp_id x = snp.reshape((n,1)) # all the SNPs - # print "X=",x # if refit: # L.fit(X=snp,REML=REML) ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) @@ -51,33 +59,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): q = compute_snp.q q.put([j,result]) return j - # PS.append(ps) - # TS.append(ts) - # return len(result) - # compute.q.put(result) - # return None def f_init(q): compute_snp.q = q def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): """ - Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows) + GWAS. The G matrix should be n inds (cols) x m snps (rows) """ + info("In gwas.gwas") matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads kfile2 = False reml = restricted_max_likelihood - sys.stderr.write(str(G.shape)+"\n") + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") - # print "***** GWAS: G",G.shape,G - assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) + info("%s SNPs",snps) + assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) @@ -85,19 +88,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) if not refit: - if verbose: sys.stderr.write("Computing fit for null model\n") + info("Computing fit for null model") lmm2.fit() # follow GN model in run_other - if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma)) - - # outFile = "test.out" - # out = open(outFile,'w') - out = sys.stderr - - def outputResult(id,beta,betaSD,ts,ps): - out.write(formatResult(id,beta,betaSD,ts,ps)) - def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - - # printOutHead() + info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma)) + res = [] # Set up the pool @@ -106,26 +100,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): p = mp.Pool(numThreads, f_init, [q]) collect = [] - # Buffers for pvalues and t-stats - # PS = [] - # TS = [] count = 0 job = 0 jobs_running = 0 + jobs_completed = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 if count % 1000 == 0: job += 1 - if verbose: - sys.stderr.write("Job %d At SNP %d\n" % (job,count)) + debug("Job %d At SNP %d" % (job,count)) if numThreads == 1: - print "Running on 1 THREAD" + debug("Running on 1 THREAD") compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) @@ -134,8 +126,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): while jobs_running > cpu_num: try: j,lst = q.get_nowait() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) jobs_running -= 1 except Queue.Empty: @@ -150,24 +143,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): if numThreads==1 or count<1000 or len(collect)>0: job += 1 - print "Collect final batch size %i job %i @%i: " % (len(collect), job, count) + debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count)) compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() res.append((j,lst)) - print "count=",count," running=",jobs_running," collect=",len(collect) + debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect))) for job in range(jobs_running): j,lst = q.get(True,15) # time out - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) - print "Before sort",[res1[0] for res1 in res] + mprint("Before sort",[res1[0] for res1 in res]) res = sorted(res,key=lambda x: x[0]) - # if verbose: - # print "res=",res[0][0:10] - print "After sort",[res1[0] for res1 in res] - print [len(res1[1]) for res1 in res] + mprint("After sort",[res1[0] for res1 in res]) + info([len(res1[1]) for res1 in res]) ts = [item[0] for j,res1 in res for item in res1] ps = [item[1] for j,res1 in res for item in res1] return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 0c43587e..1c157fd8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,17 +28,30 @@ import time from optmatrix import matrix_initialize, matrixMultT +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + def kinship_full(G): """ Calculate the Kinship matrix using a full dot multiplication """ - print G.shape + # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds - sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" - m = np.dot(G.T,G) + info("%d SNPs",m) + assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n) + # m = np.dot(G.T,G) + m = matrixMultT(G.T) m = m/G.shape[0] + # mprint("kinship_full K",m) return m def compute_W(job,G,n,snps,compute_size): @@ -74,46 +87,38 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): - numThreads = None - if numThreads: - numThreads = int(numThreads) +def kinship(G,computeSize=1000,numThreads=None,useBLAS=False): + matrix_initialize(useBLAS) - - sys.stderr.write(str(G.shape)+"\n") + + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") - assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) + info("%i SNPs" % (m)) + assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() - print "CPU cores:",cpu_num - print snps,computeSize + info("CPU cores: %i" % cpu_num) iterations = snps/computeSize+1 - # if testing: - # iterations = 8 - # jobs = range(0,8) # range(0,iterations) results = [] - K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals completed = 0 for job in range(iterations): - if verbose: - sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize))) + info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize))) W = compute_W(job,G,n,snps,computeSize) if numThreads == 1: # Single-core compute_matrixMult(job,W,q) j,x = q.get() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + progress("kinship",j,iterations) K_j = x - # print j,K_j[:,0] K = K + K_j else: # Multi-core @@ -123,52 +128,38 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): time.sleep(0.1) try: j,x = q.get_nowait() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) except Queue.Empty: pass if numThreads == None or numThreads > 1: - # results contains the growing result set for job in range(len(results)-completed): j,x = q.get(True,15) - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) K = K / float(snps) - - # outFile = 'runtest.kin' - # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) - # np.savetxt(outFile,K) - - # if saveKvaKve: - # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n") - # Kva,Kve = linalg.eigh(K) - # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) - # np.savetxt(outFile+".kva",Kva) - # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, verbose=True): +def kvakve(K): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - + info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) - if verbose: - print("Kva is: ", Kva.shape, Kva) - print("Kve is: ", Kve.shape, Kve) + mprint("Kva",Kva) + mprint("Kve",Kve) - if sum(Kva < 1e-6): - if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0))) + if sum(Kva < 0): + info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0))) Kva[Kva < 1e-6] = 1e-6 return Kva,Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 53689071..2a0c7fdc 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,49 +19,59 @@ from __future__ import absolute_import, print_function, division import sys import time -import argparse import uuid import numpy as np from scipy import linalg from scipy import optimize from scipy import stats -import pdb +# import pdb -import simplejson as json - -import gzip -import zlib +# import gzip +# import zlib import datetime -import cPickle as pickle -import simplejson as json - +# import cPickle as pickle from pprint import pformat as pf -from redis import Redis -Redis = Redis() - -import sys -sys.path.append("/home/zas1024/gene/wqflask/") - -has_gn2=True - -from utility.benchmark import Bench -from utility import temp_data - -sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/") +# Add local dir to PYTHONPATH +import os +cwd = os.path.dirname(__file__) +if sys.path[0] != cwd: + sys.path.insert(1,cwd) +# pylmm imports from kinship import kinship, kinship_full, kvakve import genotype import phenotype import gwas +from benchmark import Bench +# The following imports are for exchanging data with the webserver +import simplejson as json +from redis import Redis +Redis = Redis() +import temp_data + +has_gn2=None + +# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n") +sys.stderr.write("INFO: pylmm file is "+__file__+"\n") + +# ---- A trick to decide on the environment: try: - from wqflask.my_pylmm.pyLMM import chunks + sys.stderr.write("INFO: lmm try loading module\n") + import utility.formatting # this is never used, just to check the environment + sys.stderr.write("INFO: This is a genenetwork2 environment\n") + from gn2 import uses, progress_set_func + has_gn2=True except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") + # Failed to load gn2 has_gn2=False - pass + import standalone as handlers + from standalone import uses, progress_set_func + sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') #np.seterr('raise') @@ -76,8 +86,7 @@ def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, - refit=False, - tempdata=None): + refit=False): v = np.isnan(pheno_vector) keep = True - v @@ -169,10 +178,7 @@ def run_human(pheno_vector, #if count > 1000: # break count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - tempdata.store("percent_complete", percent_complete) + progress("human",count,total_snps) #with Bench("actual association"): ps, ts = human_association(snp, @@ -260,23 +266,19 @@ def human_association(snp, def run_other_old(pheno_vector, genotype_matrix, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -292,27 +294,22 @@ def run_other_old(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = GWAS(pheno_vector, - genotype_matrix, + genotype_matrix.T, kinship_matrix, restricted_max_likelihood=True, - refit=False, - temp_data=tempdata) + refit=False) Bench().report() return p_values, t_stats -def run_other_new(pheno_vector, - genotype_matrix, +def run_other_new(n,m,pheno_vector, + geno, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ @@ -320,8 +317,7 @@ def run_other_new(pheno_vector, print("REML=",restricted_max_likelihood," REFIT=",refit) # Adjust phenotypes - Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True) - print("Removed missing phenotypes",Y.shape) + n,Y,keep = phenotype.remove_missing_new(n,pheno_vector) # if options.maf_normalization: # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) @@ -329,8 +325,9 @@ def run_other_new(pheno_vector, # if not options.skip_genotype_normalization: # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + geno = geno[:,keep] with Bench("Calculate Kinship"): - K,G = calculate_kinship(G, tempdata) + K,G = calculate_kinship_new(geno) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -345,7 +342,7 @@ def run_other_new(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = gwas.gwas(Y, - G.T, + G, K, restricted_max_likelihood=True, refit=False,verbose=True) @@ -385,18 +382,30 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) - -def calculate_kinship_new(genotype_matrix, temp_data=None): +def calculate_kinship_new(genotype_matrix): + """ + Call the new kinship calculation where genotype_matrix contains + inds (columns) by snps (rows). + """ + assert type(genotype_matrix) is np.ndarray + info("call genotype.normalize") + G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix) + mprint("G",genotype_matrix) + info("call calculate_kinship_new") + return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME) + +def calculate_kinship_iter(geno): """ Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ - print("call genotype.normalize") + assert type(genotype_matrix) is iter + info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - print("call calculate_kinship_new") - return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) + info("call calculate_kinship_new") + return kinship(G) -def calculate_kinship_old(genotype_matrix, temp_data=None): +def calculate_kinship_old(genotype_matrix): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -404,13 +413,15 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): normalizes the resulting vectors and returns the RRM matrix. """ - print("call calculate_kinship_old") + info("call calculate_kinship_old") + fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("genotype 2D matrix n (inds) is:", n) - print("genotype 2D matrix m (snps) is:", m) + info("genotype 2D matrix n (inds) is: %d" % (n)) + info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] + mprint("G (before old normalize)",genotype_matrix) for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers @@ -429,17 +440,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - - percent_complete = int(round((counter/m)*45)) - if temp_data != None: - temp_data.store("percent_complete", percent_complete) + progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) + mprint("G (after old normalize)",genotype_matrix.T) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix,genotype_matrix - -calculate_kinship = calculate_kinship_new # alias + # return kinship_full(genotype_matrix.T),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -465,9 +472,9 @@ def GWAS(pheno_vector, refit - refit the variance component for each SNP """ - if kinship_eigen_vals == None: + if kinship_eigen_vals is None: kinship_eigen_vals = [] - if kinship_eigen_vectors == None: + if kinship_eigen_vectors is None: kinship_eigen_vectors = [] n = genotype_matrix.shape[0] @@ -537,9 +544,8 @@ def GWAS(pheno_vector, lmm_ob.fit(X=x) ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) - percent_complete = 45 + int(round((counter/m)*55)) - temp_data.store("percent_complete", percent_complete) - + progress("gwas_old",counter,m) + p_values.append(ps) t_statistics.append(ts) @@ -572,7 +578,7 @@ class LMM: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose #x = Y != -9 @@ -665,7 +671,7 @@ class LMM: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] @@ -804,48 +810,96 @@ class LMM: pl.ylabel("Probability of data") pl.title(title) - -def gn2_redis(key,species,new_code=True): +def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): """ - Invoke pylmm using Redis as a container. new_code runs the new - version + Invoke pylmm using genotype as a matrix or as a (SNP) iterator. """ - json_params = Redis.get(key) - - params = json.loads(json_params) - - tempdata = temp_data.TempData(params['temp_uuid']) - - - print('pheno', np.array(params['pheno_vector'])) + info("run_gwas") + print('pheno', y) if species == "human" : - print('kinship', np.array(params['kinship_matrix'])) - ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), - covariate_matrix = np.array(params['covariate_matrix']), - plink_input_file = params['input_file_name'], - kinship_matrix = np.array(params['kinship_matrix']), - refit = params['refit'], - tempdata = tempdata) + print('kinship', k ) + ps, ts = run_human(pheno_vector = y, + covariate_matrix = cov, + plink_input_file = inputfn, + kinship_matrix = k, + refit = refit) else: - geno = np.array(params['genotype_matrix']) print('geno', geno.shape, geno) if new_code: - ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) + ps, ts = run_other_new(n,m,pheno_vector = y, + geno = geno, + restricted_max_likelihood = reml, + refit = refit) else: - ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']), + ps, ts = run_other_old(pheno_vector = y, genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) + restricted_max_likelihood = reml, + refit = refit) + return ps,ts + +def gwas_with_redis(key,species,new_code=True): + """ + Invoke pylmm using Redis as a container. new_code runs the new + version. All the Redis code goes here! + """ + json_params = Redis.get(key) + + params = json.loads(json_params) + + tempdata = temp_data.TempData(params['temp_uuid']) + def update_tempdata(loc,i,total): + """ + This is the single method that updates Redis for percentage complete! + """ + tempdata.store("percent_complete",round(i*100.0/total)) + debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) + progress_set_func(update_tempdata) + + def narray(t): + info("Type is "+t) + v = params.get(t) + if v is not None: + # Note input values can be array of string or float + v1 = [x if x != 'NA' else 'nan' for x in v] + v = np.array(v1).astype(np.float) + return v + + def marray(t): + info("Type is "+t) + v = params.get(t) + if v is not None: + m = [] + for r in v: + # Note input values can be array of string or float + r1 = [x if x != 'NA' else 'nan' for x in r] + m.append(np.array(r1).astype(np.float)) + return np.array(m) + return np.array(v) + + def marrayT(t): + m = marray(t) + if m is not None: + return m.T + return m + + # We are transposing before we enter run_gwas - this should happen on the webserver + # side (or when reading data from file) + k = marray('kinship_matrix') + g = marrayT('genotype_matrix') + mprint("geno",g) + y = narray('pheno_vector') + n = len(y) + m = params.get('num_genotypes') + if m is None: + m = g.shape[0] + info("m=%d,n=%d" % (m,n)) + ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code) results_key = "pylmm:results:" + params['temp_uuid'] + # fatal(results_key) json_results = json.dumps(dict(p_values = ps, t_stats = ts)) @@ -854,28 +908,56 @@ def gn2_redis(key,species,new_code=True): Redis.expire(results_key, 60*60) return ps, ts -# This is the main function used by Genenetwork2 (with environment) -def gn2_main(): - parser = argparse.ArgumentParser(description='Run pyLMM') - parser.add_argument('-k', '--key') - parser.add_argument('-s', '--species') - - opts = parser.parse_args() - - key = opts.key - species = opts.species +def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): + """ + This function emulates current GN2 behaviour by pre-loading Redis (note the input + genotype is transposed to emulate GN2 (FIXME!) + """ + info("Loading Redis from parsed data") + if kinship == None: + k = None + else: + k = kinship.tolist() + params = dict(pheno_vector = pheno.tolist(), + genotype_matrix = geno.T.tolist(), + num_genotypes = geno.shape[0], + kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, + restricted_max_likelihood = True, + refit = False, + temp_uuid = "testrun_temp_uuid", + + # meta data + timestamp = datetime.datetime.now().isoformat()) + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) - gn2_redis(key,species) + return gwas_with_redis(key,species,new_code) -def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): - print("Loading Redis from parsed data") +def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): + """ + This function emulates GN2 behaviour by pre-loading Redis with + a SNP iterator, for this it sets a key for every genotype (SNP) + """ + print("Loading Redis using a SNP iterator") + for i,genotypes in enumerate(geno_iterator): + gkey = key+'_geno_'+str(i) + Redis.set(gkey, genotypes) + Redis.expire(gkey, 60*60) + if kinship == None: k = None else: k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno.tolist(), - kinship_matrix= k, + genotype_matrix = "iterator", + num_genotypes = i, + kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", @@ -887,14 +969,27 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) + return gwas_with_redis(key,species) - return gn2_redis(key,species,new_code) +# This is the main function used by Genenetwork2 (with environment) +# +# Note that this calling route will become OBSOLETE (we should use runlmm.py +# instead) +def gn2_main(): + import argparse + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + parser.add_argument('-s', '--species') + opts = parser.parse_args() + + key = opts.key + species = opts.species + + gwas_with_redis(key,species) + + if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") - if has_gn2: - gn2_main() - else: - print("Run from runlmm.py instead") - + gn2_main() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d4b3ac82..d871d8d2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,6 +24,25 @@ from scipy import optimize from optmatrix import matrixMult import kinship +sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n") +sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n") + +# ---- A trick to decide on the environment: +try: + sys.stderr.write("INFO: lmm2 try loading module\n") + import utility.formatting # this is never used, just to check the environment + sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n") + from gn2 import uses, progress_set_func +except ImportError: + # Failed to load gn2 + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + + def calculateKinship(W,center=False): """ W is an n x m matrix encoding SNP minor alleles. @@ -75,7 +94,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): print("genotype matrix n is:", n) print("genotype matrix m is:", m) - if X0 == None: + if X0 is None: X0 = np.ones((n,1)) # Remove missing values in Y and adjust associated parameters @@ -139,31 +158,35 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): class LMM2: - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. + """This is a simple version of EMMA/fastLMM. + + The main purpose of this module is to take a phenotype vector (Y), + a set of covariates (X) and a kinship matrix (K) and to optimize + this model by finding the maximum-likelihood estimates for the + model parameters. There are three model parameters: heritability + (h), covariate coefficients (beta) and the total phenotypic + variance (sigma). Heritability as defined here is the proportion + of the total variance (sigma) that is attributed to the kinship + matrix. + + For simplicity, we assume that everything being input is a numpy + array. If this is not the case, the module may throw an error as + conversion from list to numpy array is not done consistently. """ def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): - """ - The constructor takes a phenotype vector or array Y of size n. - It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """The constructor takes a phenotype vector or array Y of size n. It + takes a kinship matrix K of size n x n. Kva and Kve can be + computed as Kva,Kve = linalg.eigh(K) and cached. If they are + not provided, the constructor will calculate them. X0 is an + optional covariate matrix of size n x q, where there are q + covariates. When this parameter is not provided, the + constructor will set X0 to an n x 1 matrix of all ones to + represent a mean effect. """ - if X0 == None: + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose @@ -250,7 +273,7 @@ class LMM2: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: X = self.X0t + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack @@ -306,7 +329,7 @@ class LMM2: Given this optimum, the function computes the LL and associated ML solutions. """ - if X == None: X = self.X0t + if X is None: X = self.X0t else: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] @@ -330,7 +353,7 @@ class LMM2: def association(self,X,h=None,stack=True,REML=True,returnBeta=False): """ Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. + If h is None, the optimal h stored in optH is used. """ if False: @@ -348,7 +371,7 @@ class LMM2: self.X0t_stack[:,(self.q)] = m X = self.X0t_stack - if h == None: h = self.optH + if h is None: h = self.optH L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) q = len(beta) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 682ba371..7b652515 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -19,22 +19,47 @@ import sys import numpy as np -def remove_missing(y,g,verbose=False): +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + +def remove_missing(n,y,g): """ Remove missing data from matrices, make sure the genotype data has individuals as rows """ - assert(y!=None) - assert(y.shape[0] == g.shape[0]) + assert(y is not None) + assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape) y1 = y g1 = g v = np.isnan(y) keep = True - v if v.sum(): - if verbose: - sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) + info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) y1 = y[keep] g1 = g[keep,:] - return y1,g1,keep + n = y1.shape[0] + return n,y1,g1,keep + +def remove_missing_new(n,y): + """ + Remove missing data. Returns new n,y,keep + """ + assert(y is not None) + y1 = y + v = np.isnan(y) + keep = True - v + if v.sum(): + info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum())) + y1 = y[keep] + n = y1.shape[0] + return n,y1,keep diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 324c4f2c..6b241cd6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,10 +21,21 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_old + +# Add local dir to PYTHONPATH +import os +cwd = os.path.dirname(__file__) +if sys.path[0] != cwd: + sys.path.insert(1,cwd) + +# pylmm modules +from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas from kinship import kinship, kinship_full import genotype import phenotype +from standalone import uses + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') usage = """ python runlmm.py [options] command @@ -98,44 +109,64 @@ if options.pheno: y = tsvreader.pheno(options.pheno) print y.shape -if options.geno: +if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'redis_new': - # The main difference between redis_new and redis is that missing - # phenotypes are handled by the first - Y = y - G = g - print "Original G",G.shape, "\n", G - - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) +def check_results(ps,ts): print np.array(ps) print len(ps),sum(ps) - # Test results p1 = round(ps[0],4) p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 + info("Validating results for "+options.geno) + assert p1==0.7387, "p1=%f" % p1 + assert p2==0.7387, "p2=%f" % p2 if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 + info("Validating results for "+options.geno) + assert p1==0.062, "p1=%f" % p1 + assert p2==0.062, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - # assert p1==0.8984, "p1=%f" % p1 - # assert p2==0.9621, "p2=%f" % p2 + info("Validating results for "+options.geno) assert round(sum(ps)) == 4070 assert len(ps) == 8000 + info("Run completed") + +if y is not None: + n = y.shape[0] + +if cmd == 'run': + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + n = len(y) + m = g.shape[1] + ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP + check_results(ps,ts) +elif cmd == 'iterator': + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + geno_iterator = tsvreader.geno_iter(options.geno) + ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator) + check_results(ps,ts) +elif cmd == 'redis_new': + # The main difference between redis_new and redis is that missing + # phenotypes are handled by the first + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + Y = y + G = g + print "Original G",G.shape, "\n", G + # gt = G.T + # G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True) + check_results(ps,ts) elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: + if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,gnt) G = g.T print "Removed missing phenotypes",G.shape, "\n", G else: @@ -148,30 +179,16 @@ elif cmd == 'redis': g = None gnt = None - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) - print np.array(ps) - print len(ps),sum(ps) - # Test results 4070.02346579 - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") - if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - assert round(sum(ps)) == 4070 - assert len(ps) == 8000 + # gt = G.T + # G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) + check_results(ps,ts) elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,g.T) G = g.T print "Removed missing phenotypes",G.shape, "\n", G if options.maf_normalization: @@ -187,20 +204,20 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None) + K2,G = calculate_kinship_new(np.copy(G)) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) if options.geno == 'data/small.geno': - assert k1==0.8, "k1=%f" % k1 - assert k2==0.7939, "k2=%f" % k2 - assert k3==0.7939, "k3=%f" % k3 + assert k1==0.8333, "k1=%f" % k1 + assert k2==0.9375, "k2=%f" % k2 + assert k3==0.9375, "k3=%f" % k3 if options.geno == 'data/small_na.geno': assert k1==0.8333, "k1=%f" % k1 assert k2==0.7172, "k2=%f" % k2 @@ -209,4 +226,4 @@ elif cmd == 'kinship': assert k3==1.4352, "k3=%f" % k3 else: - print "Doing nothing" + fatal("Doing nothing") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py new file mode 100644 index 00000000..40b2021d --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -0,0 +1,110 @@ +# Standalone specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) + +from __future__ import absolute_import, print_function, division + +import numpy as np +import sys +import logging + +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') +logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) + +progress_location = None +progress_current = None +progress_prev_perc = None + +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func + +def progress(location, count, total): + global progress_location + global progress_prev_perc + + perc = round(count*100.0/total) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) + logger.info("Progress: %s %d%%" % (location,perc)) + progress_location = location + progress_prev_perc = perc + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") + +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, + fatal = fatal, + progress = progress, + mprint = mprint + ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py new file mode 100644 index 00000000..004d45c6 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py @@ -0,0 +1,25 @@ +from __future__ import print_function, division, absolute_import +from redis import Redis + +import simplejson as json + +class TempData(object): + + def __init__(self, temp_uuid): + self.temp_uuid = temp_uuid + self.redis = Redis() + self.key = "tempdata:{}".format(self.temp_uuid) + + def store(self, field, value): + self.redis.hset(self.key, field, value) + self.redis.expire(self.key, 60*15) # Expire in 15 minutes + + def get_all(self): + return self.redis.hgetall(self.key) + + +if __name__ == "__main__": + redis = Redis() + for key in redis.keys(): + for field in redis.hkeys(key): + print("{}.{}={}".format(key, field, redis.hget(key, field))) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index b4027fa3..66b34ee2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -56,7 +56,8 @@ def geno(fn): print fn with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Genotype format version 1.0") + line = tsvin.readline().strip() + assert line == "# Genotype format version 1.0", line tsvin.readline() tsvin.readline() tsvin.readline() @@ -74,3 +75,48 @@ def geno(fn): G = np.array(G1) return G +def geno(fn): + G1 = [] + for id,values in geno_iter(fn): + G1.append(values) # <--- slow + G = np.array(G1) + return G + +def geno_callback(fn,func): + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + func(id,gs2) + +def geno_iter(fn): + """ + Yield a tuple of snpid and values + """ + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + yield (id,gs2) diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee index a87d3537..881ea74d 100755 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee @@ -79,8 +79,8 @@ open_mapping_results = (data) -> $.colorbox( html: data href: "#mapping_results_holder" - height: "80%" - width: "80%" + height: "90%" + width: "90%" ) showalert = (message,alerttype) -> @@ -154,8 +154,8 @@ $("#plink_compute").click(() => $("#static_progress_bar_container").modal() url = "/marker_regression" $('input[name=method]').val("plink") - $('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val()) - $('input[name=suggestive]').val($('input[name=suggestive_plink]').val()) + #$('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val()) + #$('input[name=suggestive]').val($('input[name=suggestive_plink]').val()) $('input[name=maf]').val($('input[name=maf_plink]').val()) form_data = $('#trait_data_form').serialize() console.log("form_data is:", form_data) @@ -164,11 +164,12 @@ $("#plink_compute").click(() => ) $("#gemma_compute").click(() => + console.log("RUNNING GEMMA") $("#static_progress_bar_container").modal() url = "/marker_regression" $('input[name=method]').val("gemma") - $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val()) - $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val()) + #$('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val()) + #$('input[name=suggestive]').val($('input[name=suggestive_gemma]').val()) $('input[name=maf]').val($('input[name=maf_gemma]').val()) form_data = $('#trait_data_form').serialize() console.log("form_data is:", form_data) diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index c8988cdc..1779df4b 100755 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -99,8 +99,8 @@ open_mapping_results = function(data) { return $.colorbox({ html: data, href: "#mapping_results_holder", - height: "80%", - width: "80%" + height: "90%", + width: "90%" }); }; @@ -187,11 +187,10 @@ $("#plink_compute").click((function(_this) { $("#gemma_compute").click((function(_this) { return function() { var form_data, url; + console.log("RUNNING GEMMA"); $("#static_progress_bar_container").modal(); url = "/marker_regression"; $('input[name=method]').val("gemma"); - $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val()); - $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val()); $('input[name=maf]').val($('input[name=maf_gemma]').val()); form_data = $('#trait_data_form').serialize(); console.log("form_data is:", form_data); diff --git a/wqflask/wqflask/templates/pair_scan_results.html b/wqflask/wqflask/templates/pair_scan_results.html index 869dabed..f46d7cbf 100644 --- a/wqflask/wqflask/templates/pair_scan_results.html +++ b/wqflask/wqflask/templates/pair_scan_results.html @@ -33,6 +33,28 @@ <h2> Results </h2> + <table cellpadding="0" cellspacing="0" border="0" id="pair_scan_results" class="table table-hover table-striped table-bordered"> + <thead> + <tr> + <td>Index</td> + <td>Locus</td> + <td>Chr 1</td> + <td>Mb</td> + <td>Chr 2</td> + </tr> + </thead> + <tbody> + {% for marker in pair_scan_results %} + <tr> + <td>{{loop.index}}</td> + <td>{{marker.name}}</td> + <td>{{marker.chr1}}</td> + <td>{{marker.Mb}}</td> + <td>{{marker.chr2}}</td> + </tr> + {% endfor %} + </tbody> + </table> </div> </div> diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html index 731f6fbd..c7c2a62f 100755 --- a/wqflask/wqflask/templates/search_result_page.html +++ b/wqflask/wqflask/templates/search_result_page.html @@ -42,7 +42,7 @@ <button class="btn btn-default" id="deselect_all"><span class="glyphicon glyphicon-remove"></span> Deselect All</button> <button class="btn btn-default" id="invert"><span class="glyphicon glyphicon-resize-vertical"></span> Invert</button> <button class="btn btn-default" id="add"><span class="glyphicon glyphicon-plus-sign"></span> Add</button> - <button class="btn btn-primary pull-right"><span class="glyphicon glyphicon-download"></span> Download Table</button> + <button class="btn btn-primary"><span class="glyphicon glyphicon-download"></span> Download Table</button> <br /> <br /> <table class="table table-hover table-striped" id='trait_table'> diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index 426a5c5d..27504e51 100755 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -217,16 +217,16 @@ <div style="padding: 20px" class="form-horizontal"> <div class="mapping_method_fields form-group"> <label for="maf_plink" class="col-xs-2 control-label">Minor allele threshold</label> - <div style="margin-left: 20px;" class="col-xs-1 controls"> + <div style="margin-left: 20px;" class="col-xs-2 controls"> <input name="maf_plink" value="0.01" type="text" class="form-control"> </div> </div> </div> <div class="form-group"> - <label for="gemma_compute" class="col-xs-1 control-label"></label> + <label for="plink_compute" class="col-xs-1 control-label"></label> <div style="margin-left:20px;" class="col-xs-4 controls"> - <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression"> + <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression"> Compute </button> </div> @@ -237,16 +237,16 @@ <div style="padding: 20px" class="form-horizontal"> <div class="mapping_method_fields form-group"> <label for="maf_gemma" class="col-xs-2 control-label">Minor allele threshold</label> - <div style="margin-left: 20px;" class="col-xs-1 controls"> + <div style="margin-left: 20px;" class="col-xs-2 controls"> <input name="maf_gemma" value="0.01" type="text" class="form-control"> </div> </div> </div> <div class="form-group"> - <label for="plink_compute" class="col-xs-1 control-label"></label> + <label for="gemma_compute" class="col-xs-1 control-label"></label> <div style="margin-left:20px;" class="col-xs-4 controls"> - <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression"> + <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression"> Compute </button> </div> |