aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/runserver.py7
-rwxr-xr-xwqflask/wqflask/interval_mapping/interval_mapping.py22
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py44
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py62
-rwxr-xr-xwqflask/wqflask/model.py2
-rw-r--r--wqflask/wqflask/my_pylmm/README.md52
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/__init__.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/benchmark.py44
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py10
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py19
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gn2.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gwas.py80
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/kinship.py83
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py341
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm2.py71
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py37
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py111
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/standalone.py110
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/temp_data.py25
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py48
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee13
-rwxr-xr-xwqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js7
-rw-r--r--wqflask/wqflask/templates/pair_scan_results.html22
-rwxr-xr-xwqflask/wqflask/templates/search_result_page.html2
-rwxr-xr-xwqflask/wqflask/templates/show_trait_mapping_tools.html12
25 files changed, 956 insertions, 380 deletions
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 9d5686a9..5a76d1e2 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -18,9 +18,10 @@ from wqflask import app
#_ch = logging.StreamHandler()
#_log.addHandler(_ch)
+print app.config
+
import logging
-#from themodule import TheHandlerYouWant
-file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
+file_handler = logging.FileHandler(app.config['LOGFILE'])
file_handler.setLevel(logging.DEBUG)
app.logger.addHandler(file_handler)
@@ -28,7 +29,7 @@ import logging_tree
logging_tree.printout()
app.run(host='0.0.0.0',
- port=5003,
+ port=app.config['SERVER_PORT'],
use_debugger=False,
threaded=True,
use_reloader=True)
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
index 5eb901f7..5511826a 100755
--- a/wqflask/wqflask/interval_mapping/interval_mapping.py
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -57,13 +57,8 @@ class IntervalMapping(object):
self.set_options(start_vars)
self.json_data = {}
-
- #if self.method == "qtl_reaper":
self.json_data['lodnames'] = ['lod.hk']
self.gen_reaper_results(tempdata)
- #else:
- # self.gen_pylmm_results(tempdata)
- #self.gen_qtl_results(tempdata)
#Get chromosome lengths for drawing the interval map plot
chromosome_mb_lengths = {}
@@ -93,13 +88,7 @@ class IntervalMapping(object):
def set_options(self, start_vars):
"""Sets various options (physical/genetic mapping, # permutations, which chromosome"""
- #self.plot_scale = start_vars['scale']
- #if self.plotScale == 'physic' and not fd.genotype.Mbmap:
- # self.plotScale = 'morgan'
- #self.method = start_vars['mapping_method']
self.num_permutations = int(start_vars['num_perm'])
- #self.do_bootstrap = start_vars['do_bootstrap']
- #self.selected_chr = start_vars['chromosome']
if start_vars['manhattan_plot'] == "true":
self.manhattan_plot = True
else:
@@ -112,9 +101,6 @@ class IntervalMapping(object):
self.control_locus = start_vars['control_locus']
else:
self.control_locus = None
- #self.weighted_regression = start_vars['weighted']
- #self.lrs_lod = start_vars['lrs_lod']
-
def gen_qtl_results(self, tempdata):
"""Generates qtl results for plotting interval map"""
@@ -134,7 +120,7 @@ class IntervalMapping(object):
#if self.weighted_regression:
# self.lrs_array = self.genotype.permutation(strains = trimmed_samples,
# trait = trimmed_values,
- # variance = _vars,
+ # variance = variances,
# nperm=self.num_permutations)
#else:
self.lrs_array = genotype.permutation(strains = trimmed_samples,
@@ -175,12 +161,6 @@ class IntervalMapping(object):
self.json_data['markernames'] = []
for qtl in reaper_results:
reaper_locus = qtl.locus
- #if reaper_locus.chr == "20":
- # print("changing to X")
- # self.json_data['chr'].append("X")
- #else:
- # self.json_data['chr'].append(reaper_locus.chr)
- ##self.json_data['chr'].append(reaper_locus.chr)
self.json_data['pos'].append(reaper_locus.Mb)
self.json_data['lod.hk'].append(qtl.lrs)
self.json_data['markernames'].append(reaper_locus.name)
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
new file mode 100644
index 00000000..b1ab780c
--- /dev/null
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -0,0 +1,44 @@
+import os
+
+from base import webqtlConfig
+
+def run_gemma(this_dataset, samples, vals):
+ """Generates p-values for each marker using GEMMA"""
+
+ print("INSIDE GEMMA_MAPPING")
+
+ gen_pheno_txt_file(this_dataset, samples, vals)
+
+ os.chdir("{}gemma".format(webqtlConfig.HTMLPATH))
+
+ gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o output/%s_output' % (this_dataset.group.name,
+ this_dataset.group.name,
+ this_dataset.group.name)
+ print("gemma_command:" + gemma_command)
+
+ os.system(gemma_command)
+
+ included_markers, p_values = parse_gemma_output(this_dataset)
+
+ return included_markers, p_values
+
+def gen_pheno_txt_file(this_dataset, samples, vals):
+ """Generates phenotype file for GEMMA"""
+
+ with open("{}gemma/{}.fam".format(webqtlConfig.HTMLPATH, this_dataset.group.name), "w") as outfile:
+ for i, sample in enumerate(samples):
+ outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(vals[i]) + "\n")
+
+def parse_gemma_output(this_dataset):
+ included_markers = []
+ p_values = []
+ with open("{}gemma/output/{}_output.assoc.txt".format(webqtlConfig.HTMLPATH, this_dataset.group.name)) as output_file:
+ for line in output_file:
+ if line.startswith("chr"):
+ continue
+ else:
+ included_markers.append(line.split("\t")[1])
+ p_values.append(float(line.split("\t")[10]))
+
+ print("p_values: ", p_values)
+ return included_markers, p_values \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7708356b..bed316d7 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -25,12 +25,17 @@ from redis import Redis
Redis = Redis()
from flask import Flask, g
+from wqflask import app
from base.trait import GeneralTrait
from base import data_set
from base import species
from base import webqtlConfig
from utility import webqtlUtil
+from wqflask.marker_regression import qtl_reaper_mapping
+from wqflask.marker_regression import plink_mapping
+from wqflask.marker_regression import gemma_mapping
+from wqflask.marker_regression import rqtl_mapping
from wqflask.my_pylmm.data import prep_data
from wqflask.my_pylmm.pyLMM import lmm
from wqflask.my_pylmm.pyLMM import input
@@ -40,6 +45,14 @@ from utility import temp_data
from utility.benchmark import Bench
+import os
+if os.environ.get('PYLMM_PATH') is None:
+ PYLMM_PATH=app.config.get('PYLMM_PATH')
+ if PYLMM_PATH is None:
+ PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
+if not os.path.isfile(PYLMM_PATH+'/lmm.py'):
+ raise 'PYLMM_PATH unknown or faulty'
+PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
class MarkerRegression(object):
@@ -73,7 +86,10 @@ class MarkerRegression(object):
self.dataset.group.get_markers()
if self.mapping_method == "gemma":
- qtl_results = self.run_gemma()
+ included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
+ self.dataset.group.get_specified_markers(markers = included_markers)
+ self.dataset.group.markers.add_pvalues(p_values)
+ qtl_results = self.dataset.group.markers.markers
elif self.mapping_method == "rqtl_plink":
qtl_results = self.run_rqtl_plink()
elif self.mapping_method == "rqtl_geno":
@@ -88,9 +104,7 @@ class MarkerRegression(object):
if start_vars['pair_scan'] == "true":
self.pair_scan = True
- print("pair scan:", self.pair_scan)
- print("DOING RQTL GENO")
qtl_results = self.run_rqtl_geno()
print("qtl_results:", qtl_results)
elif self.mapping_method == "plink":
@@ -126,8 +140,9 @@ class MarkerRegression(object):
#Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
self.qtl_results = []
- for qtl in self.filtered_markers:
- print("lod score is:", qtl['lod_score'])
+ for index,qtl in enumerate(self.filtered_markers):
+ if index<40:
+ print("lod score is:", qtl['lod_score'])
if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
print("changing to X")
self.json_data['chr'].append("X")
@@ -144,7 +159,7 @@ class MarkerRegression(object):
self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
- print("json_data:", self.json_data)
+ # print("json_data:", self.json_data)
self.js_data = dict(
@@ -156,11 +171,11 @@ class MarkerRegression(object):
chromosomes = chromosome_mb_lengths,
qtl_results = self.filtered_markers,
)
+
def run_gemma(self):
"""Generates p-values for each marker using GEMMA"""
- #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))
self.gen_pheno_txt_file()
os.chdir("/home/zas1024/gene/web/gemma")
@@ -272,7 +287,7 @@ class MarkerRegression(object):
""")
def run_rqtl_geno(self):
- print("Calling R/qtl from python")
+ print("Calling R/qtl")
self.geno_to_rqtl_function()
@@ -381,9 +396,23 @@ class MarkerRegression(object):
return pheno_as_string
def process_pair_scan_results(self, result):
- results = []
+ pair_scan_results = []
+
+ result = result[1]
+ output = [tuple([result[j][i] for j in range(result.ncol)]) for i in range(result.nrow)]
+ print("R/qtl scantwo output:", output)
+
+ for i, line in enumerate(result.iter_row()):
+ marker = {}
+ marker['name'] = result.rownames[i]
+ marker['chr1'] = output[i][0]
+ marker['Mb'] = output[i][1]
+ marker['chr2'] = int(output[i][2])
+ pair_scan_results.append(marker)
+
+ print("pair_scan_results:", pair_scan_results)
- return results
+ return pair_scan_results
def process_rqtl_results(self, result): # TODO: how to make this a one liner and not copy the stuff in a loop
qtl_results = []
@@ -655,8 +684,7 @@ class MarkerRegression(object):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "other")
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
os.system(command)
@@ -713,8 +741,8 @@ class MarkerRegression(object):
# "refit": False,
# "temp_data": tempdata}
- print("genotype_matrix:", str(genotype_matrix.tolist()))
- print("pheno_vector:", str(pheno_vector.tolist()))
+ # print("genotype_matrix:", str(genotype_matrix.tolist()))
+ # print("pheno_vector:", str(pheno_vector.tolist()))
params = dict(pheno_vector = pheno_vector.tolist(),
genotype_matrix = genotype_matrix.tolist(),
@@ -732,7 +760,7 @@ class MarkerRegression(object):
Redis.expire(key, 60*60)
print("before printing command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND + ' --key {} --species {}'.format(key,
"other")
print("command is:", command)
print("after printing command")
@@ -745,7 +773,7 @@ class MarkerRegression(object):
json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
results = json.loads(json_results[1])
p_values = [float(result) for result in results['p_values']]
- print("p_values:", p_values)
+ print("p_values:", p_values[:10])
#p_values = self.trim_results(p_values)
t_stats = results['t_stats']
@@ -806,7 +834,7 @@ class MarkerRegression(object):
print("Before creating the command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
"human")
print("command is:", command)
diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py
index fa8c1aab..042cb8df 100755
--- a/wqflask/wqflask/model.py
+++ b/wqflask/wqflask/model.py
@@ -194,4 +194,4 @@ def display_collapsible(number):
def user_uuid():
"""Unique cookie for a user"""
user_uuid = request.cookies.get('user_uuid')
- \ No newline at end of file
+
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index f6b0e72d..b844c845 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,21 +1,37 @@
-# RELEASE NOTES
-
-## 0.50-gn2-pre1 release
-
-This is the first test release of multi-core pylmm into GN2. Both
-kinship calculation and GWAS have been made multi-threaded by
-introducing the Python multiprocessing module. Note that only
-run_other has been updated to use the new routines (so human is still
-handled the old way). I have taken care that we can still run both
-old-style and new-style LMM (through passing the 'new_code'
-boolean). This could be an option in the web server for users to
-select and test for any unexpected differences (of which there should
-be none, naturally ;).
-
-The current version can handle missing phenotypes, but as they are
-removed there is no way for GN2 to know what SNPs the P-values belong
-to. A future version will pass a SNP index to allow for missing
-phenotypes.
+# Genenetwork2/pylmm RELEASE NOTES
+
+## 0.51-gn2 (April 19, 2015)
+
+- Improved GN2 integration
+- Less matrix transposes
+- Able to run pylmm standalone without Redis again (still requires
+ the modules)
+
+## 0.50-gn2 (April 2nd, 2015)
+
+- Replaced the GN2 genotype normalization
+
+## 0.50-gn2-pre2 (March 18, 2015)
+
+- Added abstractions for progress meter and info/debug statements;
+ Redis perc_complete is now updated through a lambda
+
+## 0.50-gn2-pre1 (release, March 17, 2015)
+
+- This is the first test release of multi-core pylmm into GN2. Both
+ kinship calculation and GWAS have been made multi-threaded by
+ introducing the Python multiprocessing module. Note that only
+ run_other has been updated to use the new routines (so human is
+ still handled the old way). I have taken care that we can still run
+ both old-style and new-style LMM (through passing the 'new_code'
+ boolean). This could be an option in the web server for users to
+ select and test for any unexpected differences (of which there
+ should be none, naturally ;).
+
+- The current version can handle missing phenotypes, but as they are
+ removed there is no way for GN2 to know what SNPs the P-values
+ belong to. A future version will pass a SNP index to allow for
+ missing phenotypes.
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index c40c3221..f33c4e74 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre1"
+PYLMM_VERSION="0.51-gn2"
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
new file mode 100644
index 00000000..6c6c9f88
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
@@ -0,0 +1,44 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+class Bench(object):
+ entries = collections.OrderedDict()
+
+ def __init__(self, name=None):
+ self.name = name
+
+ def __enter__(self):
+ if self.name:
+ print("Starting benchmark: %s" % (self.name))
+ else:
+ print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+ self.start_time = time.time()
+
+ def __exit__(self, type, value, traceback):
+ if self.name:
+ name = self.name
+ else:
+ name = "That"
+
+ time_taken = time.time() - self.start_time
+ print(" %s took: %f seconds" % (name, (time_taken)))
+
+ if self.name:
+ Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken
+
+
+ @classmethod
+ def report(cls):
+ total_time = sum((time_taken for time_taken in cls.entries.itervalues()))
+ print("\nTiming report\n")
+ for name, time_taken in cls.entries.iteritems():
+ percent = int(round((time_taken/total_time) * 100))
+ print("[{}%] {}: {}".format(percent, name, time_taken))
+ print()
+
+ def reset(cls):
+ """Reset the entries"""
+ cls.entries = collections.OrderedDict()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3b6b5d70..4312fed0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -1,5 +1,5 @@
-# This is a converter for common LMM formats, so as to keep complexity
-# outside the main routines.
+# This is a converter for common LMM formats, so as to keep file
+# reader complexity outside the main routines.
# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
#
@@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen
Convert files for runlmm.py processing. Writes to stdout by default.
try --help for more information
+
+Examples:
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno
"""
# if len(args) == 0:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index 315fd824..49f32e3a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -37,14 +37,15 @@ def normalize(ind_g):
Run for every SNP list (for one individual) and return
normalized SNP genotype values with missing data filled in
"""
- g1 = np.copy(ind_g) # avoid side effects
- x = True - np.isnan(ind_g) # Matrix of True/False
- m = ind_g[x].mean() # Global mean value
- s = np.sqrt(ind_g[x].var()) # Global stddev
- g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data
- if s == 0:
- g1 = g1 - m # Subtract the mean
+ g = np.copy(ind_g) # copy to avoid side effects
+ missing = np.isnan(g)
+ values = g[True - missing]
+ mean = values.mean() # Global mean value
+ stddev = np.sqrt(values.var()) # Global stddev
+ g[missing] = mean # Plug-in mean values for missing data
+ if stddev == 0:
+ g = g - mean # Subtract the mean
else:
- g1 = (g1 - m) / s # Normalize the deviation
- return g1
+ g = (g - mean) / stddev # Normalize the deviation
+ return g
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
new file mode 100644
index 00000000..821195c8
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ fatal = fatal,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b901c0e2..247a8729 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -20,7 +20,6 @@
import pdb
import time
import sys
-# from utility import temp_data
import lmm2
import os
@@ -32,16 +31,25 @@ from lmm2 import LMM2
import multiprocessing as mp # Multiprocessing is part of the Python stdlib
import Queue
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses
+except ImportError:
+ has_gn2=False
+ from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
def formatResult(id,beta,betaSD,ts,ps):
return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
- # print("COMPUTE SNP",j,snp_ids,"\n")
result = []
for snp_id in snp_ids:
snp,id = snp_id
x = snp.reshape((n,1)) # all the SNPs
- # print "X=",x
# if refit:
# L.fit(X=snp,REML=REML)
ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
@@ -51,33 +59,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
q = compute_snp.q
q.put([j,result])
return j
- # PS.append(ps)
- # TS.append(ts)
- # return len(result)
- # compute.q.put(result)
- # return None
def f_init(q):
compute_snp.q = q
def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
"""
- Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+ GWAS. The G matrix should be n inds (cols) x m snps (rows)
"""
+ info("In gwas.gwas")
matrix_initialize()
cpu_num = mp.cpu_count()
numThreads = None # for now use all available threads
kfile2 = False
reml = restricted_max_likelihood
- sys.stderr.write(str(G.shape)+"\n")
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- # print "***** GWAS: G",G.shape,G
- assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
+ info("%s SNPs",snps)
+ assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
# CREATE LMM object for association
# if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
@@ -85,19 +88,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
if not refit:
- if verbose: sys.stderr.write("Computing fit for null model\n")
+ info("Computing fit for null model")
lmm2.fit() # follow GN model in run_other
- if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
-
- # outFile = "test.out"
- # out = open(outFile,'w')
- out = sys.stderr
-
- def outputResult(id,beta,betaSD,ts,ps):
- out.write(formatResult(id,beta,betaSD,ts,ps))
- def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
- # printOutHead()
+ info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
+
res = []
# Set up the pool
@@ -106,26 +100,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
p = mp.Pool(numThreads, f_init, [q])
collect = []
- # Buffers for pvalues and t-stats
- # PS = []
- # TS = []
count = 0
job = 0
jobs_running = 0
+ jobs_completed = 0
for snp in G:
snp_id = (snp,'SNPID')
count += 1
if count % 1000 == 0:
job += 1
- if verbose:
- sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+ debug("Job %d At SNP %d" % (job,count))
if numThreads == 1:
- print "Running on 1 THREAD"
+ debug("Running on 1 THREAD")
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
else:
p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
@@ -134,8 +126,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
while jobs_running > cpu_num:
try:
j,lst = q.get_nowait()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
jobs_running -= 1
except Queue.Empty:
@@ -150,24 +143,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
if numThreads==1 or count<1000 or len(collect)>0:
job += 1
- print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+ debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
res.append((j,lst))
- print "count=",count," running=",jobs_running," collect=",len(collect)
+ debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
for job in range(jobs_running):
j,lst = q.get(True,15) # time out
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
- print "Before sort",[res1[0] for res1 in res]
+ mprint("Before sort",[res1[0] for res1 in res])
res = sorted(res,key=lambda x: x[0])
- # if verbose:
- # print "res=",res[0][0:10]
- print "After sort",[res1[0] for res1 in res]
- print [len(res1[1]) for res1 in res]
+ mprint("After sort",[res1[0] for res1 in res])
+ info([len(res1[1]) for res1 in res])
ts = [item[0] for j,res1 in res for item in res1]
ps = [item[1] for j,res1 in res for item in res1]
return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 0c43587e..1c157fd8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,17 +28,30 @@ import time
from optmatrix import matrix_initialize, matrixMultT
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
def kinship_full(G):
"""
Calculate the Kinship matrix using a full dot multiplication
"""
- print G.shape
+ # mprint("kinship_full G",G)
m = G.shape[0] # snps
n = G.shape[1] # inds
- sys.stderr.write(str(m)+" SNPs\n")
- assert m>n, "n should be larger than m (snps>inds)"
- m = np.dot(G.T,G)
+ info("%d SNPs",m)
+ assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
+ # m = np.dot(G.T,G)
+ m = matrixMultT(G.T)
m = m/G.shape[0]
+ # mprint("kinship_full K",m)
return m
def compute_W(job,G,n,snps,compute_size):
@@ -74,46 +87,38 @@ def f_init(q):
# Calculate the kinship matrix from G (SNPs as rows!), returns K
#
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
- numThreads = None
- if numThreads:
- numThreads = int(numThreads)
+def kinship(G,computeSize=1000,numThreads=None,useBLAS=False):
+
matrix_initialize(useBLAS)
-
- sys.stderr.write(str(G.shape)+"\n")
+
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
+ info("%i SNPs" % (m))
+ assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
cpu_num = mp.cpu_count()
- print "CPU cores:",cpu_num
- print snps,computeSize
+ info("CPU cores: %i" % cpu_num)
iterations = snps/computeSize+1
- # if testing:
- # iterations = 8
- # jobs = range(0,8) # range(0,iterations)
results = []
-
K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals
completed = 0
for job in range(iterations):
- if verbose:
- sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize)))
+ info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
W = compute_W(job,G,n,snps,computeSize)
if numThreads == 1:
# Single-core
compute_matrixMult(job,W,q)
j,x = q.get()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ progress("kinship",j,iterations)
K_j = x
- # print j,K_j[:,0]
K = K + K_j
else:
# Multi-core
@@ -123,52 +128,38 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
time.sleep(0.1)
try:
j,x = q.get_nowait()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
except Queue.Empty:
pass
if numThreads == None or numThreads > 1:
- # results contains the growing result set
for job in range(len(results)-completed):
j,x = q.get(True,15)
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
K = K / float(snps)
-
- # outFile = 'runtest.kin'
- # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
- # np.savetxt(outFile,K)
-
- # if saveKvaKve:
- # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
- # Kva,Kve = linalg.eigh(K)
- # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
- # np.savetxt(outFile+".kva",Kva)
- # np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, verbose=True):
+def kvakve(K):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-
+ info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
- if verbose:
- print("Kva is: ", Kva.shape, Kva)
- print("Kve is: ", Kve.shape, Kve)
+ mprint("Kva",Kva)
+ mprint("Kve",Kve)
- if sum(Kva < 1e-6):
- if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
+ if sum(Kva < 0):
+ info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
Kva[Kva < 1e-6] = 1e-6
return Kva,Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 53689071..2a0c7fdc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,49 +19,59 @@ from __future__ import absolute_import, print_function, division
import sys
import time
-import argparse
import uuid
import numpy as np
from scipy import linalg
from scipy import optimize
from scipy import stats
-import pdb
+# import pdb
-import simplejson as json
-
-import gzip
-import zlib
+# import gzip
+# import zlib
import datetime
-import cPickle as pickle
-import simplejson as json
-
+# import cPickle as pickle
from pprint import pformat as pf
-from redis import Redis
-Redis = Redis()
-
-import sys
-sys.path.append("/home/zas1024/gene/wqflask/")
-
-has_gn2=True
-
-from utility.benchmark import Bench
-from utility import temp_data
-
-sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/")
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+# pylmm imports
from kinship import kinship, kinship_full, kvakve
import genotype
import phenotype
import gwas
+from benchmark import Bench
+# The following imports are for exchanging data with the webserver
+import simplejson as json
+from redis import Redis
+Redis = Redis()
+import temp_data
+
+has_gn2=None
+
+# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
try:
- from wqflask.my_pylmm.pyLMM import chunks
+ sys.stderr.write("INFO: lmm try loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment\n")
+ from gn2 import uses, progress_set_func
+ has_gn2=True
except ImportError:
- print("WARNING: Standalone version missing the Genenetwork2 environment\n")
+ # Failed to load gn2
has_gn2=False
- pass
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
#np.seterr('raise')
@@ -76,8 +86,7 @@ def run_human(pheno_vector,
covariate_matrix,
plink_input_file,
kinship_matrix,
- refit=False,
- tempdata=None):
+ refit=False):
v = np.isnan(pheno_vector)
keep = True - v
@@ -169,10 +178,7 @@ def run_human(pheno_vector,
#if count > 1000:
# break
count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- tempdata.store("percent_complete", percent_complete)
+ progress("human",count,total_snps)
#with Bench("actual association"):
ps, ts = human_association(snp,
@@ -260,23 +266,19 @@ def human_association(snp,
def run_other_old(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -292,27 +294,22 @@ def run_other_old(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = GWAS(pheno_vector,
- genotype_matrix,
+ genotype_matrix.T,
kinship_matrix,
restricted_max_likelihood=True,
- refit=False,
- temp_data=tempdata)
+ refit=False)
Bench().report()
return p_values, t_stats
-def run_other_new(pheno_vector,
- genotype_matrix,
+def run_other_new(n,m,pheno_vector,
+ geno,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
@@ -320,8 +317,7 @@ def run_other_new(pheno_vector,
print("REML=",restricted_max_likelihood," REFIT=",refit)
# Adjust phenotypes
- Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
- print("Removed missing phenotypes",Y.shape)
+ n,Y,keep = phenotype.remove_missing_new(n,pheno_vector)
# if options.maf_normalization:
# G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
@@ -329,8 +325,9 @@ def run_other_new(pheno_vector,
# if not options.skip_genotype_normalization:
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+ geno = geno[:,keep]
with Bench("Calculate Kinship"):
- K,G = calculate_kinship(G, tempdata)
+ K,G = calculate_kinship_new(geno)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -345,7 +342,7 @@ def run_other_new(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = gwas.gwas(Y,
- G.T,
+ G,
K,
restricted_max_likelihood=True,
refit=False,verbose=True)
@@ -385,18 +382,30 @@ def matrixMult(A,B):
return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-
-def calculate_kinship_new(genotype_matrix, temp_data=None):
+def calculate_kinship_new(genotype_matrix):
+ """
+ Call the new kinship calculation where genotype_matrix contains
+ inds (columns) by snps (rows).
+ """
+ assert type(genotype_matrix) is np.ndarray
+ info("call genotype.normalize")
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix)
+ mprint("G",genotype_matrix)
+ info("call calculate_kinship_new")
+ return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME)
+
+def calculate_kinship_iter(geno):
"""
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
- print("call genotype.normalize")
+ assert type(genotype_matrix) is iter
+ info("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
- print("call calculate_kinship_new")
- return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
+ info("call calculate_kinship_new")
+ return kinship(G)
-def calculate_kinship_old(genotype_matrix, temp_data=None):
+def calculate_kinship_old(genotype_matrix):
"""
genotype_matrix is an n x m matrix encoding SNP minor alleles.
@@ -404,13 +413,15 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
normalizes the resulting vectors and returns the RRM matrix.
"""
- print("call calculate_kinship_old")
+ info("call calculate_kinship_old")
+ fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- print("genotype 2D matrix n (inds) is:", n)
- print("genotype 2D matrix m (snps) is:", m)
+ info("genotype 2D matrix n (inds) is: %d" % (n))
+ info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
+ mprint("G (before old normalize)",genotype_matrix)
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
#Checks if any values in column are not numbers
@@ -429,17 +440,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
continue
keep.append(counter)
genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
-
- percent_complete = int(round((counter/m)*45))
- if temp_data != None:
- temp_data.store("percent_complete", percent_complete)
+ progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
+ mprint("G (after old normalize)",genotype_matrix.T)
kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix,genotype_matrix
-
-calculate_kinship = calculate_kinship_new # alias
+ # return kinship_full(genotype_matrix.T),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
@@ -465,9 +472,9 @@ def GWAS(pheno_vector,
refit - refit the variance component for each SNP
"""
- if kinship_eigen_vals == None:
+ if kinship_eigen_vals is None:
kinship_eigen_vals = []
- if kinship_eigen_vectors == None:
+ if kinship_eigen_vectors is None:
kinship_eigen_vectors = []
n = genotype_matrix.shape[0]
@@ -537,9 +544,8 @@ def GWAS(pheno_vector,
lmm_ob.fit(X=x)
ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
- percent_complete = 45 + int(round((counter/m)*55))
- temp_data.store("percent_complete", percent_complete)
-
+ progress("gwas_old",counter,m)
+
p_values.append(ps)
t_statistics.append(ts)
@@ -572,7 +578,7 @@ class LMM:
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
- if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+ if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
#x = Y != -9
@@ -665,7 +671,7 @@ class LMM:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None:
+ if X is None:
X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -804,48 +810,96 @@ class LMM:
pl.ylabel("Probability of data")
pl.title(title)
-
-def gn2_redis(key,species,new_code=True):
+def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
"""
- Invoke pylmm using Redis as a container. new_code runs the new
- version
+ Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
"""
- json_params = Redis.get(key)
-
- params = json.loads(json_params)
-
- tempdata = temp_data.TempData(params['temp_uuid'])
-
-
- print('pheno', np.array(params['pheno_vector']))
+ info("run_gwas")
+ print('pheno', y)
if species == "human" :
- print('kinship', np.array(params['kinship_matrix']))
- ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
- covariate_matrix = np.array(params['covariate_matrix']),
- plink_input_file = params['input_file_name'],
- kinship_matrix = np.array(params['kinship_matrix']),
- refit = params['refit'],
- tempdata = tempdata)
+ print('kinship', k )
+ ps, ts = run_human(pheno_vector = y,
+ covariate_matrix = cov,
+ plink_input_file = inputfn,
+ kinship_matrix = k,
+ refit = refit)
else:
- geno = np.array(params['genotype_matrix'])
print('geno', geno.shape, geno)
if new_code:
- ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']),
- genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
+ ps, ts = run_other_new(n,m,pheno_vector = y,
+ geno = geno,
+ restricted_max_likelihood = reml,
+ refit = refit)
else:
- ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']),
+ ps, ts = run_other_old(pheno_vector = y,
genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
+ restricted_max_likelihood = reml,
+ refit = refit)
+ return ps,ts
+
+def gwas_with_redis(key,species,new_code=True):
+ """
+ Invoke pylmm using Redis as a container. new_code runs the new
+ version. All the Redis code goes here!
+ """
+ json_params = Redis.get(key)
+
+ params = json.loads(json_params)
+
+ tempdata = temp_data.TempData(params['temp_uuid'])
+ def update_tempdata(loc,i,total):
+ """
+ This is the single method that updates Redis for percentage complete!
+ """
+ tempdata.store("percent_complete",round(i*100.0/total))
+ debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
+ progress_set_func(update_tempdata)
+
+ def narray(t):
+ info("Type is "+t)
+ v = params.get(t)
+ if v is not None:
+ # Note input values can be array of string or float
+ v1 = [x if x != 'NA' else 'nan' for x in v]
+ v = np.array(v1).astype(np.float)
+ return v
+
+ def marray(t):
+ info("Type is "+t)
+ v = params.get(t)
+ if v is not None:
+ m = []
+ for r in v:
+ # Note input values can be array of string or float
+ r1 = [x if x != 'NA' else 'nan' for x in r]
+ m.append(np.array(r1).astype(np.float))
+ return np.array(m)
+ return np.array(v)
+
+ def marrayT(t):
+ m = marray(t)
+ if m is not None:
+ return m.T
+ return m
+
+ # We are transposing before we enter run_gwas - this should happen on the webserver
+ # side (or when reading data from file)
+ k = marray('kinship_matrix')
+ g = marrayT('genotype_matrix')
+ mprint("geno",g)
+ y = narray('pheno_vector')
+ n = len(y)
+ m = params.get('num_genotypes')
+ if m is None:
+ m = g.shape[0]
+ info("m=%d,n=%d" % (m,n))
+ ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code)
results_key = "pylmm:results:" + params['temp_uuid']
+ # fatal(results_key)
json_results = json.dumps(dict(p_values = ps,
t_stats = ts))
@@ -854,28 +908,56 @@ def gn2_redis(key,species,new_code=True):
Redis.expire(results_key, 60*60)
return ps, ts
-# This is the main function used by Genenetwork2 (with environment)
-def gn2_main():
- parser = argparse.ArgumentParser(description='Run pyLMM')
- parser.add_argument('-k', '--key')
- parser.add_argument('-s', '--species')
-
- opts = parser.parse_args()
-
- key = opts.key
- species = opts.species
+def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+ """
+ This function emulates current GN2 behaviour by pre-loading Redis (note the input
+ genotype is transposed to emulate GN2 (FIXME!)
+ """
+ info("Loading Redis from parsed data")
+ if kinship == None:
+ k = None
+ else:
+ k = kinship.tolist()
+ params = dict(pheno_vector = pheno.tolist(),
+ genotype_matrix = geno.T.tolist(),
+ num_genotypes = geno.shape[0],
+ kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = "testrun_temp_uuid",
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat())
+
+ json_params = json.dumps(params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
- gn2_redis(key,species)
+ return gwas_with_redis(key,species,new_code)
-def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
- print("Loading Redis from parsed data")
+def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
+ """
+ This function emulates GN2 behaviour by pre-loading Redis with
+ a SNP iterator, for this it sets a key for every genotype (SNP)
+ """
+ print("Loading Redis using a SNP iterator")
+ for i,genotypes in enumerate(geno_iterator):
+ gkey = key+'_geno_'+str(i)
+ Redis.set(gkey, genotypes)
+ Redis.expire(gkey, 60*60)
+
if kinship == None:
k = None
else:
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = geno.tolist(),
- kinship_matrix= k,
+ genotype_matrix = "iterator",
+ num_genotypes = i,
+ kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
restricted_max_likelihood = True,
refit = False,
temp_uuid = "testrun_temp_uuid",
@@ -887,14 +969,27 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
json_params = json.dumps(params)
Redis.set(key, json_params)
Redis.expire(key, 60*60)
+ return gwas_with_redis(key,species)
- return gn2_redis(key,species,new_code)
+# This is the main function used by Genenetwork2 (with environment)
+#
+# Note that this calling route will become OBSOLETE (we should use runlmm.py
+# instead)
+def gn2_main():
+ import argparse
+ parser = argparse.ArgumentParser(description='Run pyLMM')
+ parser.add_argument('-k', '--key')
+ parser.add_argument('-s', '--species')
+ opts = parser.parse_args()
+
+ key = opts.key
+ species = opts.species
+
+ gwas_with_redis(key,species)
+
+
if __name__ == '__main__':
print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
- if has_gn2:
- gn2_main()
- else:
- print("Run from runlmm.py instead")
-
+ gn2_main()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d4b3ac82..d871d8d2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,6 +24,25 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
+sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
+
+# ---- A trick to decide on the environment:
+try:
+ sys.stderr.write("INFO: lmm2 try loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
+ from gn2 import uses, progress_set_func
+except ImportError:
+ # Failed to load gn2
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
def calculateKinship(W,center=False):
"""
W is an n x m matrix encoding SNP minor alleles.
@@ -75,7 +94,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
print("genotype matrix n is:", n)
print("genotype matrix m is:", m)
- if X0 == None:
+ if X0 is None:
X0 = np.ones((n,1))
# Remove missing values in Y and adjust associated parameters
@@ -139,31 +158,35 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
class LMM2:
- """
- This is a simple version of EMMA/fastLMM.
- The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
- and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
- There are three model parameters: heritability (h), covariate coefficients (beta) and the total
- phenotypic variance (sigma).
- Heritability as defined here is the proportion of the total variance (sigma) that is attributed to
- the kinship matrix.
-
- For simplicity, we assume that everything being input is a numpy array.
- If this is not the case, the module may throw an error as conversion from list to numpy array
- is not done consistently.
+ """This is a simple version of EMMA/fastLMM.
+
+ The main purpose of this module is to take a phenotype vector (Y),
+ a set of covariates (X) and a kinship matrix (K) and to optimize
+ this model by finding the maximum-likelihood estimates for the
+ model parameters. There are three model parameters: heritability
+ (h), covariate coefficients (beta) and the total phenotypic
+ variance (sigma). Heritability as defined here is the proportion
+ of the total variance (sigma) that is attributed to the kinship
+ matrix.
+
+ For simplicity, we assume that everything being input is a numpy
+ array. If this is not the case, the module may throw an error as
+ conversion from list to numpy array is not done consistently.
"""
def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
- """
- The constructor takes a phenotype vector or array Y of size n.
- It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
- If they are not provided, the constructor will calculate them.
- X0 is an optional covariate matrix of size n x q, where there are q covariates.
- When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+ """The constructor takes a phenotype vector or array Y of size n. It
+ takes a kinship matrix K of size n x n. Kva and Kve can be
+ computed as Kva,Kve = linalg.eigh(K) and cached. If they are
+ not provided, the constructor will calculate them. X0 is an
+ optional covariate matrix of size n x q, where there are q
+ covariates. When this parameter is not provided, the
+ constructor will set X0 to an n x 1 matrix of all ones to
+ represent a mean effect.
"""
- if X0 == None:
+ if X0 is None:
X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
@@ -250,7 +273,7 @@ class LMM2:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
X = self.X0t_stack
@@ -306,7 +329,7 @@ class LMM2:
Given this optimum, the function computes the LL and associated ML solutions.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
else:
#X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -330,7 +353,7 @@ class LMM2:
def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
"""
Calculates association statitics for the SNPs encoded in the vector X of size n.
- If h == None, the optimal h stored in optH is used.
+ If h is None, the optimal h stored in optH is used.
"""
if False:
@@ -348,7 +371,7 @@ class LMM2:
self.X0t_stack[:,(self.q)] = m
X = self.X0t_stack
- if h == None: h = self.optH
+ if h is None: h = self.optH
L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
q = len(beta)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 682ba371..7b652515 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -19,22 +19,47 @@
import sys
import numpy as np
-def remove_missing(y,g,verbose=False):
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
+def remove_missing(n,y,g):
"""
Remove missing data from matrices, make sure the genotype data has
individuals as rows
"""
- assert(y!=None)
- assert(y.shape[0] == g.shape[0])
+ assert(y is not None)
+ assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape)
y1 = y
g1 = g
v = np.isnan(y)
keep = True - v
if v.sum():
- if verbose:
- sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+ info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
y1 = y[keep]
g1 = g[keep,:]
- return y1,g1,keep
+ n = y1.shape[0]
+ return n,y1,g1,keep
+
+def remove_missing_new(n,y):
+ """
+ Remove missing data. Returns new n,y,keep
+ """
+ assert(y is not None)
+ y1 = y
+ v = np.isnan(y)
+ keep = True - v
+ if v.sum():
+ info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum()))
+ y1 = y[keep]
+ n = y1.shape[0]
+ return n,y1,keep
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 324c4f2c..6b241cd6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,10 +21,21 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_old
+
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+
+# pylmm modules
+from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
from kinship import kinship, kinship_full
import genotype
import phenotype
+from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
usage = """
python runlmm.py [options] command
@@ -98,44 +109,64 @@ if options.pheno:
y = tsvreader.pheno(options.pheno)
print y.shape
-if options.geno:
+if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'redis_new':
- # The main difference between redis_new and redis is that missing
- # phenotypes are handled by the first
- Y = y
- G = g
- print "Original G",G.shape, "\n", G
-
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+def check_results(ps,ts):
print np.array(ps)
print len(ps),sum(ps)
- # Test results
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
+ info("Validating results for "+options.geno)
+ assert p1==0.7387, "p1=%f" % p1
+ assert p2==0.7387, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
+ info("Validating results for "+options.geno)
+ assert p1==0.062, "p1=%f" % p1
+ assert p2==0.062, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
- # assert p1==0.8984, "p1=%f" % p1
- # assert p2==0.9621, "p2=%f" % p2
+ info("Validating results for "+options.geno)
assert round(sum(ps)) == 4070
assert len(ps) == 8000
+ info("Run completed")
+
+if y is not None:
+ n = y.shape[0]
+
+if cmd == 'run':
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ n = len(y)
+ m = g.shape[1]
+ ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP
+ check_results(ps,ts)
+elif cmd == 'iterator':
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ geno_iterator = tsvreader.geno_iter(options.geno)
+ ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator)
+ check_results(ps,ts)
+elif cmd == 'redis_new':
+ # The main difference between redis_new and redis is that missing
+ # phenotypes are handled by the first
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ Y = y
+ G = g
+ print "Original G",G.shape, "\n", G
+ # gt = G.T
+ # G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True)
+ check_results(ps,ts)
elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
- if y != None and options.remove_missing_phenotypes:
+ if y is not None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,gnt)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
else:
@@ -148,30 +179,16 @@ elif cmd == 'redis':
g = None
gnt = None
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results 4070.02346579
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
- if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
- if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
- if options.geno == 'data/test8000.geno':
- assert round(sum(ps)) == 4070
- assert len(ps) == 8000
+ # gt = G.T
+ # G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
+ check_results(ps,ts)
elif cmd == 'kinship':
G = g
print "Original G",G.shape, "\n", G
if y != None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,g.T)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
if options.maf_normalization:
@@ -187,20 +204,20 @@ elif cmd == 'kinship':
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
+ K2,G = calculate_kinship_new(np.copy(G))
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T)
+ K3 = kinship(G)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
if options.geno == 'data/small.geno':
- assert k1==0.8, "k1=%f" % k1
- assert k2==0.7939, "k2=%f" % k2
- assert k3==0.7939, "k3=%f" % k3
+ assert k1==0.8333, "k1=%f" % k1
+ assert k2==0.9375, "k2=%f" % k2
+ assert k3==0.9375, "k3=%f" % k3
if options.geno == 'data/small_na.geno':
assert k1==0.8333, "k1=%f" % k1
assert k2==0.7172, "k2=%f" % k2
@@ -209,4 +226,4 @@ elif cmd == 'kinship':
assert k3==1.4352, "k3=%f" % k3
else:
- print "Doing nothing"
+ fatal("Doing nothing")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
new file mode 100644
index 00000000..40b2021d
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -0,0 +1,110 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ fatal = fatal,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
new file mode 100644
index 00000000..004d45c6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
@@ -0,0 +1,25 @@
+from __future__ import print_function, division, absolute_import
+from redis import Redis
+
+import simplejson as json
+
+class TempData(object):
+
+ def __init__(self, temp_uuid):
+ self.temp_uuid = temp_uuid
+ self.redis = Redis()
+ self.key = "tempdata:{}".format(self.temp_uuid)
+
+ def store(self, field, value):
+ self.redis.hset(self.key, field, value)
+ self.redis.expire(self.key, 60*15) # Expire in 15 minutes
+
+ def get_all(self):
+ return self.redis.hgetall(self.key)
+
+
+if __name__ == "__main__":
+ redis = Redis()
+ for key in redis.keys():
+ for field in redis.hkeys(key):
+ print("{}.{}={}".format(key, field, redis.hget(key, field)))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..66b34ee2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -56,7 +56,8 @@ def geno(fn):
print fn
with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ line = tsvin.readline().strip()
+ assert line == "# Genotype format version 1.0", line
tsvin.readline()
tsvin.readline()
tsvin.readline()
@@ -74,3 +75,48 @@ def geno(fn):
G = np.array(G1)
return G
+def geno(fn):
+ G1 = []
+ for id,values in geno_iter(fn):
+ G1.append(values) # <--- slow
+ G = np.array(G1)
+ return G
+
+def geno_callback(fn,func):
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ func(id,gs2)
+
+def geno_iter(fn):
+ """
+ Yield a tuple of snpid and values
+ """
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ yield (id,gs2)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
index a87d3537..881ea74d 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee
@@ -79,8 +79,8 @@ open_mapping_results = (data) ->
$.colorbox(
html: data
href: "#mapping_results_holder"
- height: "80%"
- width: "80%"
+ height: "90%"
+ width: "90%"
)
showalert = (message,alerttype) ->
@@ -154,8 +154,8 @@ $("#plink_compute").click(() =>
$("#static_progress_bar_container").modal()
url = "/marker_regression"
$('input[name=method]').val("plink")
- $('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
- $('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
+ #$('input[name=mapping_display_all]').val($('input[name=display_all_plink]').val())
+ #$('input[name=suggestive]').val($('input[name=suggestive_plink]').val())
$('input[name=maf]').val($('input[name=maf_plink]').val())
form_data = $('#trait_data_form').serialize()
console.log("form_data is:", form_data)
@@ -164,11 +164,12 @@ $("#plink_compute").click(() =>
)
$("#gemma_compute").click(() =>
+ console.log("RUNNING GEMMA")
$("#static_progress_bar_container").modal()
url = "/marker_regression"
$('input[name=method]').val("gemma")
- $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
- $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
+ #$('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val())
+ #$('input[name=suggestive]').val($('input[name=suggestive_gemma]').val())
$('input[name=maf]').val($('input[name=maf_gemma]').val())
form_data = $('#trait_data_form').serialize()
console.log("form_data is:", form_data)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
index c8988cdc..1779df4b 100755
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
@@ -99,8 +99,8 @@ open_mapping_results = function(data) {
return $.colorbox({
html: data,
href: "#mapping_results_holder",
- height: "80%",
- width: "80%"
+ height: "90%",
+ width: "90%"
});
};
@@ -187,11 +187,10 @@ $("#plink_compute").click((function(_this) {
$("#gemma_compute").click((function(_this) {
return function() {
var form_data, url;
+ console.log("RUNNING GEMMA");
$("#static_progress_bar_container").modal();
url = "/marker_regression";
$('input[name=method]').val("gemma");
- $('input[name=mapping_display_all]').val($('input[name=display_all_gemma]').val());
- $('input[name=suggestive]').val($('input[name=suggestive_gemma]').val());
$('input[name=maf]').val($('input[name=maf_gemma]').val());
form_data = $('#trait_data_form').serialize();
console.log("form_data is:", form_data);
diff --git a/wqflask/wqflask/templates/pair_scan_results.html b/wqflask/wqflask/templates/pair_scan_results.html
index 869dabed..f46d7cbf 100644
--- a/wqflask/wqflask/templates/pair_scan_results.html
+++ b/wqflask/wqflask/templates/pair_scan_results.html
@@ -33,6 +33,28 @@
<h2>
Results
</h2>
+ <table cellpadding="0" cellspacing="0" border="0" id="pair_scan_results" class="table table-hover table-striped table-bordered">
+ <thead>
+ <tr>
+ <td>Index</td>
+ <td>Locus</td>
+ <td>Chr 1</td>
+ <td>Mb</td>
+ <td>Chr 2</td>
+ </tr>
+ </thead>
+ <tbody>
+ {% for marker in pair_scan_results %}
+ <tr>
+ <td>{{loop.index}}</td>
+ <td>{{marker.name}}</td>
+ <td>{{marker.chr1}}</td>
+ <td>{{marker.Mb}}</td>
+ <td>{{marker.chr2}}</td>
+ </tr>
+ {% endfor %}
+ </tbody>
+ </table>
</div>
</div>
diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html
index 731f6fbd..c7c2a62f 100755
--- a/wqflask/wqflask/templates/search_result_page.html
+++ b/wqflask/wqflask/templates/search_result_page.html
@@ -42,7 +42,7 @@
<button class="btn btn-default" id="deselect_all"><span class="glyphicon glyphicon-remove"></span> Deselect All</button>
<button class="btn btn-default" id="invert"><span class="glyphicon glyphicon-resize-vertical"></span> Invert</button>
<button class="btn btn-default" id="add"><span class="glyphicon glyphicon-plus-sign"></span> Add</button>
- <button class="btn btn-primary pull-right"><span class="glyphicon glyphicon-download"></span> Download Table</button>
+ <button class="btn btn-primary"><span class="glyphicon glyphicon-download"></span> Download Table</button>
<br />
<br />
<table class="table table-hover table-striped" id='trait_table'>
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index 426a5c5d..27504e51 100755
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -217,16 +217,16 @@
<div style="padding: 20px" class="form-horizontal">
<div class="mapping_method_fields form-group">
<label for="maf_plink" class="col-xs-2 control-label">Minor allele threshold</label>
- <div style="margin-left: 20px;" class="col-xs-1 controls">
+ <div style="margin-left: 20px;" class="col-xs-2 controls">
<input name="maf_plink" value="0.01" type="text" class="form-control">
</div>
</div>
</div>
<div class="form-group">
- <label for="gemma_compute" class="col-xs-1 control-label"></label>
+ <label for="plink_compute" class="col-xs-1 control-label"></label>
<div style="margin-left:20px;" class="col-xs-4 controls">
- <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+ <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
Compute
</button>
</div>
@@ -237,16 +237,16 @@
<div style="padding: 20px" class="form-horizontal">
<div class="mapping_method_fields form-group">
<label for="maf_gemma" class="col-xs-2 control-label">Minor allele threshold</label>
- <div style="margin-left: 20px;" class="col-xs-1 controls">
+ <div style="margin-left: 20px;" class="col-xs-2 controls">
<input name="maf_gemma" value="0.01" type="text" class="form-control">
</div>
</div>
</div>
<div class="form-group">
- <label for="plink_compute" class="col-xs-1 control-label"></label>
+ <label for="gemma_compute" class="col-xs-1 control-label"></label>
<div style="margin-left:20px;" class="col-xs-4 controls">
- <button id="plink_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
+ <button id="gemma_compute" class="btn submit_special btn-primary" data-url="/marker_regression" title="Compute Marker Regression">
Compute
</button>
</div>