aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorzsloan2015-05-01 22:30:00 +0000
committerzsloan2015-05-01 22:30:00 +0000
commit72fb1ac4ade5b812ccafcc00076ae19dcad39b9e (patch)
treead82086bc4f3ec6284e193436265e8bb4df3473a /wqflask
parent64526b27922053d608147b2ce896b42ee9e5c2dc (diff)
downloadgenenetwork2-72fb1ac4ade5b812ccafcc00076ae19dcad39b9e.tar.gz
Added Pjotr's latest pylmm changes
Added GEMMA and necessary plink-format genotype files in gemma directory Added the GEMMA genotype files to git LFS Removed some unnecessary/duplicated code from interval_mapping.py Separated GEMMA mapping code into its own file (will also do this for other methods) and fixed and issue that caused it to not run Added the table to the pair scan results page and wrote some code adding its attributes to each marker object, but it still isn't working yet
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/runserver.py7
-rwxr-xr-xwqflask/wqflask/interval_mapping/interval_mapping.py22
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py44
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py62
-rwxr-xr-xwqflask/wqflask/model.py2
-rw-r--r--wqflask/wqflask/my_pylmm/README.md52
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/__init__.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/benchmark.py44
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py10
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py19
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gn2.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gwas.py80
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/kinship.py83
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py341
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm2.py71
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py37
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py111
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/standalone.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/temp_data.py25
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py48
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee13
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js7
-rw-r--r--wqflask/wqflask/templates/pair_scan_results.html22
-rwxr-xr-xwqflask/wqflask/templates/search_result_page.html2
-rwxr-xr-xwqflask/wqflask/templates/show_trait_mapping_tools.html12
25 files changed, 956 insertions, 380 deletions
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 9d5686a9..5a76d1e2 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -18,9 +18,10 @@ from wqflask import app
#_ch = logging.StreamHandler()
#_log.addHandler(_ch)
+print app.config
+
import logging
-#from themodule import TheHandlerYouWant
-file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
+file_handler = logging.FileHandler(app.config['LOGFILE'])
file_handler.setLevel(logging.DEBUG)
app.logger.addHandler(file_handler)
@@ -28,7 +29,7 @@ import logging_tree
logging_tree.printout()
app.run(host='0.0.0.0',
- port=5003,
+ port=app.config['SERVER_PORT'],
use_debugger=False,
threaded=True,
use_reloader=True)
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
index 5eb901f7..5511826a 100755
--- a/wqflask/wqflask/interval_mapping/interval_mapping.py
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -57,13 +57,8 @@ class IntervalMapping(object):
self.set_options(start_vars)
self.json_data = {}
-
- #if self.method == "qtl_reaper":
self.json_data['lodnames'] = ['lod.hk']
self.gen_reaper_results(tempdata)
- #else:
- # self.gen_pylmm_results(tempdata)
- #self.gen_qtl_results(tempdata)
#Get chromosome lengths for drawing the interval map plot
chromosome_mb_lengths = {}
@@ -93,13 +88,7 @@ class IntervalMapping(object):
def set_options(self, start_vars):
"""Sets various options (physical/genetic mapping, # permutations, which chromosome"""
- #self.plot_scale = start_vars['scale']
- #if self.plotScale == 'physic' and not fd.genotype.Mbmap:
- # self.plotScale = 'morgan'
- #self.method = start_vars['mapping_method']
self.num_permutations = int(start_vars['num_perm'])
- #self.do_bootstrap = start_vars['do_bootstrap']
- #self.selected_chr = start_vars['chromosome']
if start_vars['manhattan_plot'] == "true":
self.manhattan_plot = True
else:
@@ -112,9 +101,6 @@ class IntervalMapping(object):
self.control_locus = start_vars['control_locus']
else:
self.control_locus = None
- #self.weighted_regression = start_vars['weighted']
- #self.lrs_lod = start_vars['lrs_lod']
-
def gen_qtl_results(self, tempdata):
"""Generates qtl results for plotting interval map"""
@@ -134,7 +120,7 @@ class IntervalMapping(object):
#if self.weighted_regression:
# self.lrs_array = self.genotype.permutation(strains = trimmed_samples,
# trait = trimmed_values,
- # variance = _vars,
+ # variance = variances,
# nperm=self.num_permutations)
#else:
self.lrs_array = genotype.permutation(strains = trimmed_samples,
@@ -175,12 +161,6 @@ class IntervalMapping(object):
self.json_data['markernames'] = []
for qtl in reaper_results:
reaper_locus = qtl.locus
- #if reaper_locus.chr == "20":
- # print("changing to X")
- # self.json_data['chr'].append("X")
- #else:
- # self.json_data['chr'].append(reaper_locus.chr)
- ##self.json_data['chr'].append(reaper_locus.chr)
self.json_data['pos'].append(reaper_locus.Mb)
self.json_data['lod.hk'].append(qtl.lrs)
self.json_data['markernames'].append(reaper_locus.name)
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
new file mode 100644
index 00000000..b1ab780c
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -0,0 +1,44 @@
+import os
+
+from base import webqtlConfig
+
+def run_gemma(this_dataset, samples, vals):
+ """Generates p-values for each marker using GEMMA"""
+
+ print("INSIDE GEMMA_MAPPING")
+
+ gen_pheno_txt_file(this_dataset, samples, vals)
+
+ os.chdir("{}gemma".format(webqtlConfig.HTMLPATH))
+
+ gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o output/%s_output' % (this_dataset.group.name,
+ this_dataset.group.name,
+ this_dataset.group.name)
+ print("gemma_command:" + gemma_command)
+
+ os.system(gemma_command)
+
+ included_markers, p_values = parse_gemma_output(this_dataset)
+
+ return included_markers, p_values
+
+def gen_pheno_txt_file(this_dataset, samples, vals):
+ """Generates phenotype file for GEMMA"""
+
+ with open("{}gemma/{}.fam".format(webqtlConfig.HTMLPATH, this_dataset.group.name), "w") as outfile:
+ for i, sample in enumerate(samples):
+ outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(vals[i]) + "\n")
+
+def parse_gemma_output(this_dataset):
+ included_markers = []
+ p_values = []
+ with open("{}gemma/output/{}_output.assoc.txt".format(webqtlConfig.HTMLPATH, this_dataset.group.name)) as output_file:
+ for line in output_file:
+ if line.startswith("chr"):
+ continue
+ else:
+ included_markers.append(line.split("\t")[1])
+ p_values.append(float(line.split("\t")[10]))
+
+ print("p_values: ", p_values)
+ return included_markers, p_values \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7708356b..bed316d7 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -25,12 +25,17 @@ from redis import Redis
Redis = Redis()
from flask import Flask, g
+from wqflask import app
from base.trait import GeneralTrait
from base import data_set
from base import species
from base import webqtlConfig
from utility import webqtlUtil
+from wqflask.marker_regression import qtl_reaper_mapping
+from wqflask.marker_regression import plink_mapping
+from wqflask.marker_regression import gemma_mapping
+from wqflask.marker_regression import rqtl_mapping
from wqflask.my_pylmm.data import prep_data
from wqflask.my_pylmm.pyLMM import lmm
from wqflask.my_pylmm.pyLMM import input
@@ -40,6 +45,14 @@ from utility import temp_data
from utility.benchmark import Bench
+import os
+if os.environ.get('PYLMM_PATH') is None:
+ PYLMM_PATH=app.config.get('PYLMM_PATH')
+ if PYLMM_PATH is None:
+ PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
+if not os.path.isfile(PYLMM_PATH+'/lmm.py'):
+ raise 'PYLMM_PATH unknown or faulty'
+PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
class MarkerRegression(object):
@@ -73,7 +86,10 @@ class MarkerRegression(object):
self.dataset.group.get_markers()
if self.mapping_method == "gemma":
- qtl_results = self.run_gemma()
+ included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
+ self.dataset.group.get_specified_markers(markers = included_markers)
+ self.dataset.group.markers.add_pvalues(p_values)
+ qtl_results = self.dataset.group.markers.markers
elif self.mapping_method == "rqtl_plink":
qtl_results = self.run_rqtl_plink()
elif self.mapping_method == "rqtl_geno":
@@ -88,9 +104,7 @@ class MarkerRegression(object):
if start_vars['pair_scan'] == "true":
self.pair_scan = True
- print("pair scan:", self.pair_scan)
- print("DOING RQTL GENO")
qtl_results = self.run_rqtl_geno()
print("qtl_results:", qtl_results)
elif self.mapping_method == "plink":
@@ -126,8 +140,9 @@ class MarkerRegression(object):
#Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
self.qtl_results = []
- for qtl in self.filtered_markers:
- print("lod score is:", qtl['lod_score'])
+ for index,qtl in enumerate(self.filtered_markers):
+ if index<40:
+ print("lod score is:", qtl['lod_score'])
if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
print("changing to X")
self.json_data['chr'].append("X")
@@ -144,7 +159,7 @@ class MarkerRegression(object):
self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
- print("json_data:", self.json_data)
+ # print("json_data:", self.json_data)
self.js_data = dict(
@@ -156,11 +171,11 @@ class MarkerRegression(object):
chromosomes = chromosome_mb_lengths,
qtl_results = self.filtered_markers,
)
+
def run_gemma(self):
"""Generates p-values for each marker using GEMMA"""
- #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))
self.gen_pheno_txt_file()
os.chdir("/home/zas1024/gene/web/gemma")
@@ -272,7 +287,7 @@ class MarkerRegression(object):
""")
def run_rqtl_geno(self):
- print("Calling R/qtl from python")
+ print("Calling R/qtl")
self.geno_to_rqtl_function()
@@ -381,9 +396,23 @@ class MarkerRegression(object):
return pheno_as_string
def process_pair_scan_results(self, result):
- results = []
+ pair_scan_results = []
+
+ result = result[1]
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ print("R/qtl scantwo output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr1'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['chr2'] = int(output[i][2])
+ pair_scan_results.append(marker)
+
+ print("pair_scan_results:", pair_scan_results)
- return results
+ return pair_scan_results
def process_rqtl_results(self, result): # TODO: how to make this a one liner and not copy the stuff in a loop
qtl_results = []
@@ -655,8 +684,7 @@ class MarkerRegression(object):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "other")
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
os.system(command)
@@ -713,8 +741,8 @@ class MarkerRegression(object):
# "refit": False,
# "temp_data": tempdata}
- print("genotype_matrix:", str(genotype_matrix.tolist()))
- print("pheno_vector:", str(pheno_vector.tolist()))
+ # print("genotype_matrix:", str(genotype_matrix.tolist()))
+ # print("pheno_vector:", str(pheno_vector.tolist()))
params = dict(pheno_vector = pheno_vector.tolist(),
genotype_matrix = genotype_matrix.tolist(),
@@ -732,7 +760,7 @@ class MarkerRegression(object):
Redis.expire(key, 60*60)
print("before printing command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND + ' --key {} --species {}'.format(key,
"other")
print("command is:", command)
print("after printing command")
@@ -745,7 +773,7 @@ class MarkerRegression(object):
json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
results = json.loads(json_results[1])
p_values = [float(result) for result in results['p_values']]
- print("p_values:", p_values)
+ print("p_values:", p_values[:10])
#p_values = self.trim_results(p_values)
t_stats = results['t_stats']
@@ -806,7 +834,7 @@ class MarkerRegression(object):
print("Before creating the command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
"human")
print("command is:", command)
diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py
index fa8c1aab..042cb8df 100755
--- a/wqflask/wqflask/model.py
+++ b/wqflask/wqflask/model.py
@@ -194,4 +194,4 @@ def display_collapsible(number):
def user_uuid():
"""Unique cookie for a user"""
user_uuid = request.cookies.get('user_uuid')
- \ No newline at end of file
+
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index f6b0e72d..b844c845 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,21 +1,37 @@
-# RELEASE NOTES
-
-## 0.50-gn2-pre1 release
-
-This is the first test release of multi-core pylmm into GN2. Both
-kinship calculation and GWAS have been made multi-threaded by
-introducing the Python multiprocessing module. Note that only
-run_other has been updated to use the new routines (so human is still
-handled the old way). I have taken care that we can still run both
-old-style and new-style LMM (through passing the 'new_code'
-boolean). This could be an option in the web server for users to
-select and test for any unexpected differences (of which there should
-be none, naturally ;).
-
-The current version can handle missing phenotypes, but as they are
-removed there is no way for GN2 to know what SNPs the P-values belong
-to. A future version will pass a SNP index to allow for missing
-phenotypes.
+# Genenetwork2/pylmm RELEASE NOTES
+
+## 0.51-gn2 (April 19, 2015)
+
+- Improved GN2 integration
+- Less matrix transposes
+- Able to run pylmm standalone without Redis again (still requires
+ the modules)
+
+## 0.50-gn2 (April 2nd, 2015)
+
+- Replaced the GN2 genotype normalization
+
+## 0.50-gn2-pre2 (March 18, 2015)
+
+- Added abstractions for progress meter and info/debug statements;
+ Redis perc_complete is now updated through a lambda
+
+## 0.50-gn2-pre1 (release, March 17, 2015)
+
+- This is the first test release of multi-core pylmm into GN2. Both
+ kinship calculation and GWAS have been made multi-threaded by
+ introducing the Python multiprocessing module. Note that only
+ run_other has been updated to use the new routines (so human is
+ still handled the old way). I have taken care that we can still run
+ both old-style and new-style LMM (through passing the 'new_code'
+ boolean). This could be an option in the web server for users to
+ select and test for any unexpected differences (of which there
+ should be none, naturally ;).
+
+- The current version can handle missing phenotypes, but as they are
+ removed there is no way for GN2 to know what SNPs the P-values
+ belong to. A future version will pass a SNP index to allow for
+ missing phenotypes.
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index c40c3221..f33c4e74 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre1"
+PYLMM_VERSION="0.51-gn2"
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
new file mode 100644
index 00000000..6c6c9f88
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
@@ -0,0 +1,44 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+class Bench(object):
+ entries = collections.OrderedDict()
+
+ def __init__(self, name=None):
+ self.name = name
+
+ def __enter__(self):
+ if self.name:
+ print("Starting benchmark: %s" % (self.name))
+ else:
+ print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+ self.start_time = time.time()
+
+ def __exit__(self, type, value, traceback):
+ if self.name:
+ name = self.name
+ else:
+ name = "That"
+
+ time_taken = time.time() - self.start_time
+ print(" %s took: %f seconds" % (name, (time_taken)))
+
+ if self.name:
+ Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken
+
+
+ @classmethod
+ def report(cls):
+ total_time = sum((time_taken for time_taken in cls.entries.itervalues()))
+ print("\nTiming report\n")
+ for name, time_taken in cls.entries.iteritems():
+ percent = int(round((time_taken/total_time) * 100))
+ print("[{}%] {}: {}".format(percent, name, time_taken))
+ print()
+
+ def reset(cls):
+ """Reset the entries"""
+ cls.entries = collections.OrderedDict()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3b6b5d70..4312fed0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -1,5 +1,5 @@
-# This is a converter for common LMM formats, so as to keep complexity
-# outside the main routines.
+# This is a converter for common LMM formats, so as to keep file
+# reader complexity outside the main routines.
# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
#
@@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen
Convert files for runlmm.py processing. Writes to stdout by default.
try --help for more information
+
+Examples:
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno
"""
# if len(args) == 0:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index 315fd824..49f32e3a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -37,14 +37,15 @@ def normalize(ind_g):
Run for every SNP list (for one individual) and return
normalized SNP genotype values with missing data filled in
"""
- g1 = np.copy(ind_g) # avoid side effects
- x = True - np.isnan(ind_g) # Matrix of True/False
- m = ind_g[x].mean() # Global mean value
- s = np.sqrt(ind_g[x].var()) # Global stddev
- g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data
- if s == 0:
- g1 = g1 - m # Subtract the mean
+ g = np.copy(ind_g) # copy to avoid side effects
+ missing = np.isnan(g)
+ values = g[True - missing]
+ mean = values.mean() # Global mean value
+ stddev = np.sqrt(values.var()) # Global stddev
+ g[missing] = mean # Plug-in mean values for missing data
+ if stddev == 0:
+ g = g - mean # Subtract the mean
else:
- g1 = (g1 - m) / s # Normalize the deviation
- return g1
+ g = (g - mean) / stddev # Normalize the deviation
+ return g
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
new file mode 100644
index 00000000..821195c8
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ fatal = fatal,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b901c0e2..247a8729 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -20,7 +20,6 @@
import pdb
import time
import sys
-# from utility import temp_data
import lmm2
import os
@@ -32,16 +31,25 @@ from lmm2 import LMM2
import multiprocessing as mp # Multiprocessing is part of the Python stdlib
import Queue
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses
+except ImportError:
+ has_gn2=False
+ from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
def formatResult(id,beta,betaSD,ts,ps):
return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
- # print("COMPUTE SNP",j,snp_ids,"\n")
result = []
for snp_id in snp_ids:
snp,id = snp_id
x = snp.reshape((n,1)) # all the SNPs
- # print "X=",x
# if refit:
# L.fit(X=snp,REML=REML)
ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
@@ -51,33 +59,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
q = compute_snp.q
q.put([j,result])
return j
- # PS.append(ps)
- # TS.append(ts)
- # return len(result)
- # compute.q.put(result)
- # return None
def f_init(q):
compute_snp.q = q
def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
"""
- Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+ GWAS. The G matrix should be n inds (cols) x m snps (rows)
"""
+ info("In gwas.gwas")
matrix_initialize()
cpu_num = mp.cpu_count()
numThreads = None # for now use all available threads
kfile2 = False
reml = restricted_max_likelihood
- sys.stderr.write(str(G.shape)+"\n")
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- # print "***** GWAS: G",G.shape,G
- assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
+ info("%s SNPs",snps)
+ assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
# CREATE LMM object for association
# if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
@@ -85,19 +88,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
if not refit:
- if verbose: sys.stderr.write("Computing fit for null model\n")
+ info("Computing fit for null model")
lmm2.fit() # follow GN model in run_other
- if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
-
- # outFile = "test.out"
- # out = open(outFile,'w')
- out = sys.stderr
-
- def outputResult(id,beta,betaSD,ts,ps):
- out.write(formatResult(id,beta,betaSD,ts,ps))
- def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
- # printOutHead()
+ info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
+
res = []
# Set up the pool
@@ -106,26 +100,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
p = mp.Pool(numThreads, f_init, [q])
collect = []
- # Buffers for pvalues and t-stats
- # PS = []
- # TS = []
count = 0
job = 0
jobs_running = 0
+ jobs_completed = 0
for snp in G:
snp_id = (snp,'SNPID')
count += 1
if count % 1000 == 0:
job += 1
- if verbose:
- sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+ debug("Job %d At SNP %d" % (job,count))
if numThreads == 1:
- print "Running on 1 THREAD"
+ debug("Running on 1 THREAD")
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
else:
p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
@@ -134,8 +126,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
while jobs_running > cpu_num:
try:
j,lst = q.get_nowait()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
jobs_running -= 1
except Queue.Empty:
@@ -150,24 +143,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
if numThreads==1 or count<1000 or len(collect)>0:
job += 1
- print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+ debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
res.append((j,lst))
- print "count=",count," running=",jobs_running," collect=",len(collect)
+ debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
for job in range(jobs_running):
j,lst = q.get(True,15) # time out
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
- print "Before sort",[res1[0] for res1 in res]
+ mprint("Before sort",[res1[0] for res1 in res])
res = sorted(res,key=lambda x: x[0])
- # if verbose:
- # print "res=",res[0][0:10]
- print "After sort",[res1[0] for res1 in res]
- print [len(res1[1]) for res1 in res]
+ mprint("After sort",[res1[0] for res1 in res])
+ info([len(res1[1]) for res1 in res])
ts = [item[0] for j,res1 in res for item in res1]
ps = [item[1] for j,res1 in res for item in res1]
return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 0c43587e..1c157fd8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,17 +28,30 @@ import time
from optmatrix import matrix_initialize, matrixMultT
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
def kinship_full(G):
"""
Calculate the Kinship matrix using a full dot multiplication
"""
- print G.shape
+ # mprint("kinship_full G",G)
m = G.shape[0] # snps
n = G.shape[1] # inds
- sys.stderr.write(str(m)+" SNPs\n")
- assert m>n, "n should be larger than m (snps>inds)"
- m = np.dot(G.T,G)
+ info("%d SNPs",m)
+ assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
+ # m = np.dot(G.T,G)
+ m = matrixMultT(G.T)
m = m/G.shape[0]
+ # mprint("kinship_full K",m)
return m
def compute_W(job,G,n,snps,compute_size):
@@ -74,46 +87,38 @@ def f_init(q):
# Calculate the kinship matrix from G (SNPs as rows!), returns K
#
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
- numThreads = None
- if numThreads:
- numThreads = int(numThreads)
+def kinship(G,computeSize=1000,numThreads=None,useBLAS=False):
+
matrix_initialize(useBLAS)
-
- sys.stderr.write(str(G.shape)+"\n")
+
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
+ info("%i SNPs" % (m))
+ assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
cpu_num = mp.cpu_count()
- print "CPU cores:",cpu_num
- print snps,computeSize
+ info("CPU cores: %i" % cpu_num)
iterations = snps/computeSize+1
- # if testing:
- # iterations = 8
- # jobs = range(0,8) # range(0,iterations)
results = []
-
K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals
completed = 0
for job in range(iterations):
- if verbose:
- sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize)))
+ info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
W = compute_W(job,G,n,snps,computeSize)
if numThreads == 1:
# Single-core
compute_matrixMult(job,W,q)
j,x = q.get()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ progress("kinship",j,iterations)
K_j = x
- # print j,K_j[:,0]
K = K + K_j
else:
# Multi-core
@@ -123,52 +128,38 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
time.sleep(0.1)
try:
j,x = q.get_nowait()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
except Queue.Empty:
pass
if numThreads == None or numThreads > 1:
- # results contains the growing result set
for job in range(len(results)-completed):
j,x = q.get(True,15)
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
K = K / float(snps)
-
- # outFile = 'runtest.kin'
- # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
- # np.savetxt(outFile,K)
-
- # if saveKvaKve:
- # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
- # Kva,Kve = linalg.eigh(K)
- # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
- # np.savetxt(outFile+".kva",Kva)
- # np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, verbose=True):
+def kvakve(K):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-
+ info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
- if verbose:
- print("Kva is: ", Kva.shape, Kva)
- print("Kve is: ", Kve.shape, Kve)
+ mprint("Kva",Kva)
+ mprint("Kve",Kve)
- if sum(Kva < 1e-6):
- if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
+ if sum(Kva < 0):
+ info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
Kva[Kva < 1e-6] = 1e-6
return Kva,Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 53689071..2a0c7fdc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,49 +19,59 @@ from __future__ import absolute_import, print_function, division
import sys
import time
-import argparse
import uuid
import numpy as np
from scipy import linalg
from scipy import optimize
from scipy import stats
-import pdb
+# import pdb
-import simplejson as json
-
-import gzip
-import zlib
+# import gzip
+# import zlib
import datetime
-import cPickle as pickle
-import simplejson as json
-
+# import cPickle as pickle
from pprint import pformat as pf
-from redis import Redis
-Redis = Redis()
-
-import sys
-sys.path.append("/home/zas1024/gene/wqflask/")
-
-has_gn2=True
-
-from utility.benchmark import Bench
-from utility import temp_data
-
-sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/")
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+# pylmm imports
from kinship import kinship, kinship_full, kvakve
import genotype
import phenotype
import gwas
+from benchmark import Bench
+# The following imports are for exchanging data with the webserver
+import simplejson as json
+from redis import Redis
+Redis = Redis()
+import temp_data
+
+has_gn2=None
+
+# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
try:
- from wqflask.my_pylmm.pyLMM import chunks
+ sys.stderr.write("INFO: lmm try loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment\n")
+ from gn2 import uses, progress_set_func
+ has_gn2=True
except ImportError:
- print("WARNING: Standalone version missing the Genenetwork2 environment\n")
+ # Failed to load gn2
has_gn2=False
- pass
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
#np.seterr('raise')
@@ -76,8 +86,7 @@ def run_human(pheno_vector,
covariate_matrix,
plink_input_file,
kinship_matrix,
- refit=False,
- tempdata=None):
+ refit=False):
v = np.isnan(pheno_vector)
keep = True - v
@@ -169,10 +178,7 @@ def run_human(pheno_vector,
#if count > 1000:
# break
count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- tempdata.store("percent_complete", percent_complete)
+ progress("human",count,total_snps)
#with Bench("actual association"):
ps, ts = human_association(snp,
@@ -260,23 +266,19 @@ def human_association(snp,
def run_other_old(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -292,27 +294,22 @@ def run_other_old(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = GWAS(pheno_vector,
- genotype_matrix,
+ genotype_matrix.T,
kinship_matrix,
restricted_max_likelihood=True,
- refit=False,
- temp_data=tempdata)
+ refit=False)
Bench().report()
return p_values, t_stats
-def run_other_new(pheno_vector,
- genotype_matrix,
+def run_other_new(n,m,pheno_vector,
+ geno,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
@@ -320,8 +317,7 @@ def run_other_new(pheno_vector,
print("REML=",restricted_max_likelihood," REFIT=",refit)
# Adjust phenotypes
- Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
- print("Removed missing phenotypes",Y.shape)
+ n,Y,keep = phenotype.remove_missing_new(n,pheno_vector)
# if options.maf_normalization:
# G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
@@ -329,8 +325,9 @@ def run_other_new(pheno_vector,
# if not options.skip_genotype_normalization:
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+ geno = geno[:,keep]
with Bench("Calculate Kinship"):
- K,G = calculate_kinship(G, tempdata)
+ K,G = calculate_kinship_new(geno)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -345,7 +342,7 @@ def run_other_new(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = gwas.gwas(Y,
- G.T,
+ G,
K,
restricted_max_likelihood=True,
refit=False,verbose=True)
@@ -385,18 +382,30 @@ def matrixMult(A,B):
return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-
-def calculate_kinship_new(genotype_matrix, temp_data=None):
+def calculate_kinship_new(genotype_matrix):
+ """
+ Call the new kinship calculation where genotype_matrix contains
+ inds (columns) by snps (rows).
+ """
+ assert type(genotype_matrix) is np.ndarray
+ info("call genotype.normalize")
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix)
+ mprint("G",genotype_matrix)
+ info("call calculate_kinship_new")
+ return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME)
+
+def calculate_kinship_iter(geno):
"""
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
- print("call genotype.normalize")
+ assert type(genotype_matrix) is iter
+ info("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
- print("call calculate_kinship_new")
- return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
+ info("call calculate_kinship_new")
+ return kinship(G)
-def calculate_kinship_old(genotype_matrix, temp_data=None):
+def calculate_kinship_old(genotype_matrix):
"""
genotype_matrix is an n x m matrix encoding SNP minor alleles.
@@ -404,13 +413,15 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
normalizes the resulting vectors and returns the RRM matrix.
"""
- print("call calculate_kinship_old")
+ info("call calculate_kinship_old")
+ fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- print("genotype 2D matrix n (inds) is:", n)
- print("genotype 2D matrix m (snps) is:", m)
+ info("genotype 2D matrix n (inds) is: %d" % (n))
+ info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
+ mprint("G (before old normalize)",genotype_matrix)
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
#Checks if any values in column are not numbers
@@ -429,17 +440,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
continue
keep.append(counter)
genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
-
- percent_complete = int(round((counter/m)*45))
- if temp_data != None:
- temp_data.store("percent_complete", percent_complete)
+ progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
+ mprint("G (after old normalize)",genotype_matrix.T)
kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix,genotype_matrix
-
-calculate_kinship = calculate_kinship_new # alias
+ # return kinship_full(genotype_matrix.T),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
@@ -465,9 +472,9 @@ def GWAS(pheno_vector,
refit - refit the variance component for each SNP
"""
- if kinship_eigen_vals == None:
+ if kinship_eigen_vals is None:
kinship_eigen_vals = []
- if kinship_eigen_vectors == None:
+ if kinship_eigen_vectors is None:
kinship_eigen_vectors = []
n = genotype_matrix.shape[0]
@@ -537,9 +544,8 @@ def GWAS(pheno_vector,
lmm_ob.fit(X=x)
ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
- percent_complete = 45 + int(round((counter/m)*55))
- temp_data.store("percent_complete", percent_complete)
-
+ progress("gwas_old",counter,m)
+
p_values.append(ps)
t_statistics.append(ts)
@@ -572,7 +578,7 @@ class LMM:
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
- if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+ if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
#x = Y != -9
@@ -665,7 +671,7 @@ class LMM:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None:
+ if X is None:
X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -804,48 +810,96 @@ class LMM:
pl.ylabel("Probability of data")
pl.title(title)
-
-def gn2_redis(key,species,new_code=True):
+def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
"""
- Invoke pylmm using Redis as a container. new_code runs the new
- version
+ Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
"""
- json_params = Redis.get(key)
-
- params = json.loads(json_params)
-
- tempdata = temp_data.TempData(params['temp_uuid'])
-
-
- print('pheno', np.array(params['pheno_vector']))
+ info("run_gwas")
+ print('pheno', y)
if species == "human" :
- print('kinship', np.array(params['kinship_matrix']))
- ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
- covariate_matrix = np.array(params['covariate_matrix']),
- plink_input_file = params['input_file_name'],
- kinship_matrix = np.array(params['kinship_matrix']),
- refit = params['refit'],
- tempdata = tempdata)
+ print('kinship', k )
+ ps, ts = run_human(pheno_vector = y,
+ covariate_matrix = cov,
+ plink_input_file = inputfn,
+ kinship_matrix = k,
+ refit = refit)
else:
- geno = np.array(params['genotype_matrix'])
print('geno', geno.shape, geno)
if new_code:
- ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']),
- genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
+ ps, ts = run_other_new(n,m,pheno_vector = y,
+ geno = geno,
+ restricted_max_likelihood = reml,
+ refit = refit)
else:
- ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']),
+ ps, ts = run_other_old(pheno_vector = y,
genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
+ restricted_max_likelihood = reml,
+ refit = refit)
+ return ps,ts
+
+def gwas_with_redis(key,species,new_code=True):
+ """
+ Invoke pylmm using Redis as a container. new_code runs the new
+ version. All the Redis code goes here!
+ """
+ json_params = Redis.get(key)
+
+ params = json.loads(json_params)
+
+ tempdata = temp_data.TempData(params['temp_uuid'])
+ def update_tempdata(loc,i,total):
+ """
+ This is the single method that updates Redis for percentage complete!
+ """
+ tempdata.store("percent_complete",round(i*100.0/total))
+ debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
+ progress_set_func(update_tempdata)
+
+ def narray(t):
+ info("Type is "+t)
+ v = params.get(t)
+ if v is not None:
+ # Note input values can be array of string or float
+ v1 = [x if x != 'NA' else 'nan' for x in v]
+ v = np.array(v1).astype(np.float)
+ return v
+
+ def marray(t):
+ info("Type is "+t)
+ v = params.get(t)
+ if v is not None:
+ m = []
+ for r in v:
+ # Note input values can be array of string or float
+ r1 = [x if x != 'NA' else 'nan' for x in r]
+ m.append(np.array(r1).astype(np.float))
+ return np.array(m)
+ return np.array(v)
+
+ def marrayT(t):
+ m = marray(t)
+ if m is not None:
+ return m.T
+ return m
+
+ # We are transposing before we enter run_gwas - this should happen on the webserver
+ # side (or when reading data from file)
+ k = marray('kinship_matrix')
+ g = marrayT('genotype_matrix')
+ mprint("geno",g)
+ y = narray('pheno_vector')
+ n = len(y)
+ m = params.get('num_genotypes')
+ if m is None:
+ m = g.shape[0]
+ info("m=%d,n=%d" % (m,n))
+ ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code)
results_key = "pylmm:results:" + params['temp_uuid']
+ # fatal(results_key)
json_results = json.dumps(dict(p_values = ps,
t_stats = ts))
@@ -854,28 +908,56 @@ def gn2_redis(key,species,new_code=True):
Redis.expire(results_key, 60*60)
return ps, ts
-# This is the main function used by Genenetwork2 (with environment)
-def gn2_main():
- parser = argparse.ArgumentParser(description='Run pyLMM')
- parser.add_argument('-k', '--key')
- parser.add_argument('-s', '--species')
-
- opts = parser.parse_args()
-
- key = opts.key
- species = opts.species
+def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+ """
+ This function emulates current GN2 behaviour by pre-loading Redis (note the input
+ genotype is transposed to emulate GN2 (FIXME!)
+ """
+ info("Loading Redis from parsed data")
+ if kinship == None:
+ k = None
+ else:
+ k = kinship.tolist()
+ params = dict(pheno_vector = pheno.tolist(),
+ genotype_matrix = geno.T.tolist(),
+ num_genotypes = geno.shape[0],
+ kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = "testrun_temp_uuid",
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat())
+
+ json_params = json.dumps(params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
- gn2_redis(key,species)
+ return gwas_with_redis(key,species,new_code)
-def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
- print("Loading Redis from parsed data")
+def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
+ """
+ This function emulates GN2 behaviour by pre-loading Redis with
+ a SNP iterator, for this it sets a key for every genotype (SNP)
+ """
+ print("Loading Redis using a SNP iterator")
+ for i,genotypes in enumerate(geno_iterator):
+ gkey = key+'_geno_'+str(i)
+ Redis.set(gkey, genotypes)
+ Redis.expire(gkey, 60*60)
+
if kinship == None:
k = None
else:
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = geno.tolist(),
- kinship_matrix= k,
+ genotype_matrix = "iterator",
+ num_genotypes = i,
+ kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
restricted_max_likelihood = True,
refit = False,
temp_uuid = "testrun_temp_uuid",
@@ -887,14 +969,27 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
json_params = json.dumps(params)
Redis.set(key, json_params)
Redis.expire(key, 60*60)
+ return gwas_with_redis(key,species)
- return gn2_redis(key,species,new_code)
+# This is the main function used by Genenetwork2 (with environment)
+#
+# Note that this calling route will become OBSOLETE (we should use runlmm.py
+# instead)
+def gn2_main():
+ import argparse
+ parser = argparse.ArgumentParser(description='Run pyLMM')
+ parser.add_argument('-k', '--key')
+ parser.add_argument('-s', '--species')
+ opts = parser.parse_args()
+
+ key = opts.key
+ species = opts.species
+
+ gwas_with_redis(key,species)
+
+
if __name__ == '__main__':
print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
- if has_gn2:
- gn2_main()
- else:
- print("Run from runlmm.py instead")
-
+ gn2_main()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d4b3ac82..d871d8d2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,6 +24,25 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
+sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
+try:
+ sys.stderr.write("INFO: lmm2 try loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
+ from gn2 import uses, progress_set_func
+except ImportError:
+ # Failed to load gn2
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
def calculateKinship(W,center=False):
"""
W is an n x m matrix encoding SNP minor alleles.
@@ -75,7 +94,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
print("genotype matrix n is:", n)
print("genotype matrix m is:", m)
- if X0 == None:
+ if X0 is None:
X0 = np.ones((n,1))
# Remove missing values in Y and adjust associated parameters
@@ -139,31 +158,35 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
class LMM2:
- """
- This is a simple version of EMMA/fastLMM.
- The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
- and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
- There are three model parameters: heritability (h), covariate coefficients (beta) and the total
- phenotypic variance (sigma).
- Heritability as defined here is the proportion of the total variance (sigma) that is attributed to
- the kinship matrix.
-
- For simplicity, we assume that everything being input is a numpy array.
- If this is not the case, the module may throw an error as conversion from list to numpy array
- is not done consistently.
+ """This is a simple version of EMMA/fastLMM.
+
+ The main purpose of this module is to take a phenotype vector (Y),
+ a set of covariates (X) and a kinship matrix (K) and to optimize
+ this model by finding the maximum-likelihood estimates for the
+ model parameters. There are three model parameters: heritability
+ (h), covariate coefficients (beta) and the total phenotypic
+ variance (sigma). Heritability as defined here is the proportion
+ of the total variance (sigma) that is attributed to the kinship
+ matrix.
+
+ For simplicity, we assume that everything being input is a numpy
+ array. If this is not the case, the module may throw an error as
+ conversion from list to numpy array is not done consistently.
"""
def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
- """
- The constructor takes a phenotype vector or array Y of size n.
- It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
- If they are not provided, the constructor will calculate them.
- X0 is an optional covariate matrix of size n x q, where there are q covariates.
- When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+ """The constructor takes a phenotype vector or array Y of size n. It
+ takes a kinship matrix K of size n x n. Kva and Kve can be
+ computed as Kva,Kve = linalg.eigh(K) and cached. If they are
+ not provided, the constructor will calculate them. X0 is an
+ optional covariate matrix of size n x q, where there are q
+ covariates. When this parameter is not provided, the
+ constructor will set X0 to an n x 1 matrix of all ones to
+ represent a mean effect.
"""
- if X0 == None:
+ if X0 is None:
X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
@@ -250,7 +273,7 @@ class LMM2:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
X = self.X0t_stack
@@ -306,7 +329,7 @@ class LMM2:
Given this optimum, the function computes the LL and associated ML solutions.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
else:
#X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -330,7 +353,7 @@ class LMM2:
def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
"""
Calculates association statitics for the SNPs encoded in the vector X of size n.
- If h == None, the optimal h stored in optH is used.
+ If h is None, the optimal h stored in optH is used.
"""
if False:
@@ -348,7 +371,7 @@ class LMM2:
self.X0t_stack[:,(self.q)] = m
X = self.X0t_stack
- if h == None: h = self.optH
+ if h is None: h = self.optH
L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
q = len(beta)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 682ba371..7b652515 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -19,22 +19,47 @@
import sys
import numpy as np
-def remove_missing(y,g,verbose=False):
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
+def remove_missing(n,y,g):
"""
Remove missing data from matrices, make sure the genotype data has
individuals as rows
"""
- assert(y!=None)
- assert(y.shape[0] == g.shape[0])
+ assert(y is not None)
+ assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape)
y1 = y
g1 = g
v = np.isnan(y)
keep = True - v
if v.sum():
- if verbose:
- sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+ info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
y1 = y[keep]
g1 = g[keep,:]
- return y1,g1,keep
+ n = y1.shape[0]
+ return n,y1,g1,keep
+
+def remove_missing_new(n,y):
+ """
+ Remove missing data. Returns new n,y,keep
+ """
+ assert(y is not None)
+ y1 = y
+ v = np.isnan(y)
+ keep = True - v
+ if v.sum():
+ info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum()))
+ y1 = y[keep]
+ n = y1.shape[0]
+ return n,y1,keep
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 324c4f2c..6b241cd6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,10 +21,21 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_old
+
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+
+# pylmm modules
+from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
from kinship import kinship, kinship_full
import genotype
import phenotype
+from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
usage = """
python runlmm.py [options] command
@@ -98,44 +109,64 @@ if options.pheno:
y = tsvreader.pheno(options.pheno)
print y.shape
-if options.geno:
+if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'redis_new':
- # The main difference between redis_new and redis is that missing
- # phenotypes are handled by the first
- Y = y
- G = g
- print "Original G",G.shape, "\n", G
-
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+def check_results(ps,ts):
print np.array(ps)
print len(ps),sum(ps)
- # Test results
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
+ info("Validating results for "+options.geno)
+ assert p1==0.7387, "p1=%f" % p1
+ assert p2==0.7387, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
+ info("Validating results for "+options.geno)
+ assert p1==0.062, "p1=%f" % p1
+ assert p2==0.062, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
- # assert p1==0.8984, "p1=%f" % p1
- # assert p2==0.9621, "p2=%f" % p2
+ info("Validating results for "+options.geno)
assert round(sum(ps)) == 4070
assert len(ps) == 8000
+ info("Run completed")
+
+if y is not None:
+ n = y.shape[0]
+
+if cmd == 'run':
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ n = len(y)
+ m = g.shape[1]
+ ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP
+ check_results(ps,ts)
+elif cmd == 'iterator':
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ geno_iterator = tsvreader.geno_iter(options.geno)
+ ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator)
+ check_results(ps,ts)
+elif cmd == 'redis_new':
+ # The main difference between redis_new and redis is that missing
+ # phenotypes are handled by the first
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ Y = y
+ G = g
+ print "Original G",G.shape, "\n", G
+ # gt = G.T
+ # G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True)
+ check_results(ps,ts)
elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
- if y != None and options.remove_missing_phenotypes:
+ if y is not None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,gnt)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
else:
@@ -148,30 +179,16 @@ elif cmd == 'redis':
g = None
gnt = None
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results 4070.02346579
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
- if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
- if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
- if options.geno == 'data/test8000.geno':
- assert round(sum(ps)) == 4070
- assert len(ps) == 8000
+ # gt = G.T
+ # G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
+ check_results(ps,ts)
elif cmd == 'kinship':
G = g
print "Original G",G.shape, "\n", G
if y != None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,g.T)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
if options.maf_normalization:
@@ -187,20 +204,20 @@ elif cmd == 'kinship':
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
+ K2,G = calculate_kinship_new(np.copy(G))
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T)
+ K3 = kinship(G)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
if options.geno == 'data/small.geno':
- assert k1==0.8, "k1=%f" % k1
- assert k2==0.7939, "k2=%f" % k2
- assert k3==0.7939, "k3=%f" % k3
+ assert k1==0.8333, "k1=%f" % k1
+ assert k2==0.9375, "k2=%f" % k2
+ assert k3==0.9375, "k3=%f" % k3
if options.geno == 'data/small_na.geno':
assert k1==0.8333, "k1=%f" % k1
assert k2==0.7172, "k2=%f" % k2
@@ -209,4 +226,4 @@ elif cmd == 'kinship':
assert k3==1.4352, "k3=%f" % k3
else:
- print "Doing nothing"
+ fatal("Doing nothing")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
new file mode 100644
index 00000000..40b2021d
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ fatal = fatal,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
new file mode 100644
index 00000000..004d45c6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
@@ -0,0 +1,25 @@
+from __future__ import print_function, division, absolute_import
+from redis import Redis
+
+import simplejson as json
+
+class TempData(object):
+
+ def __init__(self, temp_uuid):
+ self.temp_uuid = temp_uuid
+ self.redis = Redis()
+ self.key = "tempdata:{}".format(self.temp_uuid)
+
+ def store(self, field, value):
+ self.redis.hset(self.key, field, value)
+ self.redis.expire(self.key, 60*15) # Expire in 15 minutes
+
+ def get_all(self):
+ return self.redis.hgetall(self.key)
+
+
+if __name__ == "__main__":
+ redis = Redis()
+ for key in redis.keys():
+ for field in redis.hkeys(key):
+ print("{}.{}={}".format(key, field, redis.hget(key, field)))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..66b34ee2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -56,7 +56,8 @@ def geno(fn):
print fn
with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ line = tsvin.readline().strip()
+ assert line == "# Genotype format version 1.0", line
tsvin.readline()
tsvin.readline()
tsvin.readline()
@@ -74,3 +75,48 @@ def geno(fn):
G = np.array(G1)
return G
+def geno(fn):
+ G1 = []
+ for id,values in geno_iter(fn):
+ G1.append(values) # <--- slow
+ G = np.array(G1)
+ return G
+
+def geno_callback(fn,func):
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ func(id,gs2)
+
+def geno_iter(fn):
+ """
+ Yield a tuple of snpid and values
+ """
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ yield (id,gs2)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index a87d3537..881ea74d 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -79,8 +79,8 @@ open_mapping_results = (data) ->
$.colorbox(
html: data
href: "#mapping_results_holder"
- height: "80%"
- width: "80%"
+ height: "90%"
+ width: "90%"
)
showalert = (message,alerttype) ->
@@ -154,8 +154,8 @@ $("#plink_compute").click(() =>
$("#static_progress_bar_container").modal()
url = "/marker_regression"
$('input[name=method]').val("plink")
- $('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
- $('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
+ #$('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
+ #$('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
$('input[name=maf]').val($('input[name=maf_plink]').val())
form_data = $('#trait_data_form').serialize()
console.log("form_data is:", form_data)
@@ -164,11 +164,12 @@ $("#plink_compute").click(() =>
)
$("#gemma_compute").click(() =>
+ console.log("RUNNING GEMMA")
$("#static_progress_bar_container").modal()
url = "/marker_regression"
$('input[name=method]').val("gemma")
- $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
- $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
+ #$('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
+ #$('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
$('input[name=maf]').val($('input[name=maf_gemma]').val())
form_data = $('#trait_data_form').serialize()
console.log("form_data is:", form_data)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
index c8988cdc..1779df4b 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
@@ -99,8 +99,8 @@ open_mapping_results = function(data) {
return $.colorbox({
html: data,
href: "#mapping_results_holder",
- height: "80%",
- width: "80%"
+ height: "90%",
+ width: "90%"
});
};
@@ -187,11 +187,10 @@ $("#plink_compute").click((function(_this) {
$("#gemma_compute").click((function(_this) {
return function() {
var form_data, url;
+ console.log("RUNNING GEMMA");
$("#static_progress_bar_container").modal();
url = "/marker_regression";
$('input[name=method]').val("gemma");
- $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val());
- $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val());
$('input[name=maf]').val($('input[name=maf_gemma]').val());
form_data = $('#trait_data_form').serialize();
console.log("form_data is:", form_data);
diff --git a/wqflask/wqflask/templates/pair_scan_results.html b/wqflask/wqflask/templates/pair_scan_results.html
index 869dabed..f46d7cbf 100644
--- a/wqflask/wqflask/templates/pair_scan_results.html
+++ b/wqflask/wqflask/templates/pair_scan_results.html
@@ -33,6 +33,28 @@
<h2>
Results
</h2>
+ <table cellpadding="0" cellspacing="0" border="0" id="pair_scan_results" class="table table-hover table-striped table-bordered">
+ <thead>
+ <tr>
+ <td>Index</td>
+ <td>Locus</td>
+ <td>Chr 1</td>
+ <td>Mb</td>
+ <td>Chr 2</td>
+ </tr>
+ </thead>
+ <tbody>
+ {% for marker in pair_scan_results %}
+ <tr>
+ <td>{{loop.index}}</td>
+ <td>{{marker.name}}</td>
+ <td>{{marker.chr1}}</td>
+ <td>{{marker.Mb}}</td>
+ <td>{{marker.chr2}}</td>
+ </tr>
+ {% endfor %}
+ </tbody>
+ </table>
</div>
</div>
diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html
index 731f6fbd..c7c2a62f 100755
--- a/wqflask/wqflask/templates/search_result_page.html
+++ b/wqflask/wqflask/templates/search_result_page.html
@@ -42,7 +42,7 @@
<button class="btn btn-default" id="deselect_all"><span class="glyphicon glyphicon-remove"></span> Deselect All</button>
<button class="btn btn-default" id="invert"><span class="glyphicon glyphicon-resize-vertical"></span> Invert</button>
<button class="btn btn-default" id="add"><span class="glyphicon glyphicon-plus-sign"></span> Add</button>
- <button class="btn btn-primary pull-right"><span class="glyphicon glyphicon-download"></span> Download Table</button>
+ <button class="btn btn-primary"><span class="glyphicon glyphicon-download"></span> Download Table</button>
<br />
<br />
<table class="table table-hover table-striped" id='trait_table'>
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index 426a5c5d..27504e51 100755
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -217,16 +217,16 @@
<div style="padding: 20px" class="form-horizontal">
<div class="mapping_method_fields form-group">
<label for="maf_plink" class="col-xs-2 control-label">Minor allele threshold</label>
- <div style="margin-left: 20px;" class="col-xs-1 controls">
+ <div style="margin-left: 20px;" class="col-xs-2 controls">
<input name="maf_plink" value="0.01" type="text" class="form-control">
</div>
</div>
</div>
<div class="form-group">
- <label for="gemma_compute" class="col-xs-1 control-label"></label>
+ <label for="plink_compute" class="col-xs-1 control-label"></label>
<div style="margin-left:20px;" class="col-xs-4 controls">
- <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+ <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
Compute
</button>
</div>
@@ -237,16 +237,16 @@
<div style="padding: 20px" class="form-horizontal">
<div class="mapping_method_fields form-group">
<label for="maf_gemma" class="col-xs-2 control-label">Minor allele threshold</label>
- <div style="margin-left: 20px;" class="col-xs-1 controls">
+ <div style="margin-left: 20px;" class="col-xs-2 controls">
<input name="maf_gemma" value="0.01" type="text" class="form-control">
</div>
</div>
</div>
<div class="form-group">
- <label for="plink_compute" class="col-xs-1 control-label"></label>
+ <label for="gemma_compute" class="col-xs-1 control-label"></label>
<div style="margin-left:20px;" class="col-xs-4 controls">
- <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+ <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
Compute
</button>
</div>