aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2017-09-22 16:08:57 +0000
committerzsloan2017-09-22 16:08:57 +0000
commitf2c29437a3c76b28d434f87e5b1f5cb08b9b2c6c (patch)
treecf5e69a4b54d5846431562a4b4f04d8ebe60e24f
parent873a7c1b6312bee324ae25d550af15e9e8964a01 (diff)
parent6c0e665d86c501b4a1640e067d46ecb61a3ae1bf (diff)
downloadgenenetwork2-f2c29437a3c76b28d434f87e5b1f5cb08b9b2c6c.tar.gz
Merge branch 'testing' of https://github.com/genenetwork/genenetwork2
-rwxr-xr-xbin/genenetwork21
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py141
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression.py3
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression_gn1.py4
-rw-r--r--wqflask/wqflask/templates/marker_regression_gn1.html1
-rw-r--r--wqflask/wqflask/templates/show_trait_mapping_tools.html16
-rw-r--r--wqflask/wqflask/views.py2
7 files changed, 129 insertions, 39 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index e07a4e32..5e791885 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -68,6 +68,7 @@ else
export PATH=$GN2_PROFILE/bin:$PATH
export PYTHONPATH=$GN2_PROFILE/lib/python2.7/site-packages
export R_LIBS_SITE=$GN2_PROFILE/site-library
+ export GEM_PATH=$GN2_PROFILE/lib/ruby/gems/2.4.0
export JS_GUIX_PATH=$GN2_PROFILE/share/genenetwork2/javascript
export GUIX_GTK3_PATH="$GN2_PROFILE/lib/gtk-3.0"
export GI_TYPELIB_PATH="$GN2_PROFILE/lib/girepository-1.0"
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 31e94266..f23c390e 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -1,14 +1,14 @@
-import os, math
+import os, math, string, random, json
from base import webqtlConfig
from base.trait import GeneralTrait
from base.data_set import create_dataset
-from utility.tools import flat_files, GEMMA_COMMAND
+from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND
import utility.logger
logger = utility.logger.getLogger(__name__ )
-def run_gemma(this_dataset, samples, vals, covariates, method):
+def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
"""Generates p-values for each marker using GEMMA"""
print("INSIDE GEMMA_MAPPING")
@@ -18,62 +18,88 @@ def run_gemma(this_dataset, samples, vals, covariates, method):
if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)):
open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name), "w+")
- logger.debug("COVARIATES_GEMMA:", covariates)
+ this_chromosomes = this_dataset.species.chromosomes.chromosomes
+ chr_list_string = ""
+ for i in range(len(this_chromosomes)):
+ if i < (len(this_chromosomes) - 1):
+ chr_list_string += this_chromosomes[i+1].name + ","
+ else:
+ chr_list_string += this_chromosomes[i+1].name
if covariates != "":
gen_covariates_file(this_dataset, covariates)
- if method == "gemma":
- #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -o %s_output' % (flat_files('mapping'),
- gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'),
- this_dataset.group.name,
- flat_files('mapping'),
- this_dataset.group.name,
- flat_files('mapping'),
- this_dataset.group.name,
- webqtlConfig.GENERATED_IMAGE_DIR,
- this_dataset.group.name)
- # use GEMMA_RUN in the next one, create a unique temp file
+
+ if method == "gemma":
+ gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'),
+ this_dataset.group.name,
+ flat_files('mapping'),
+ this_dataset.group.name)
+ if covariates != "":
+ gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'),
+ this_dataset.group.name,
+ webqtlConfig.GENERATED_IMAGE_DIR,
+ this_dataset.group.name)
else:
- logger.debug("FLAT FILES:", flat_files('mapping'))
- #gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -o %s_output' % (flat_files('genotype/bimbam'),
- gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'),
+ #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
+ gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
+ this_dataset.group.name)
+ else:
+ if use_loco == "True":
+ k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
+ generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > /home/zas1024/tmp/gn2/%s.json' % (flat_files('genotype/bimbam'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
this_dataset.group.name,
+ k_output_filename)
+ logger.debug("k_command:" + generate_k_command)
+ os.system(generate_k_command)
+
+ gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input /home/zas1024/tmp/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (k_output_filename,
flat_files('genotype/bimbam'),
this_dataset.group.name,
- flat_files('mapping'),
- this_dataset.group.name,
- webqtlConfig.GENERATED_IMAGE_DIR,
- this_dataset.group.name)
- else:
- if method == "gemma":
- #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
- gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % (flat_files('mapping'),
- this_dataset.group.name,
- flat_files('mapping'),
- this_dataset.group.name,
- webqtlConfig.GENERATED_IMAGE_DIR,
+ flat_files('genotype/bimbam'),
this_dataset.group.name)
+
+ gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
+ if covariates != "":
+ gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/%s.json' % (flat_files('mapping'),
+ this_dataset.group.name,
+ flat_files('genotype/bimbam'),
+ this_dataset.group.name,
+ gwa_output_filename)
+ else:
+ gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > /home/zas1024/tmp/gn2/%s.json' % (flat_files('genotype/bimbam'),
+ this_dataset.group.name,
+ gwa_output_filename)
+
else:
- #gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('genotype/bimbam'),
- gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'),
+ gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
this_dataset.group.name,
flat_files('genotype/bimbam'),
- this_dataset.group.name,
- webqtlConfig.GENERATED_IMAGE_DIR,
this_dataset.group.name)
- logger.debug("gemma_command:" + gemma_command)
+ if covariates != "":
+ gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'),
+ this_dataset.group.name,
+ webqtlConfig.GENERATED_IMAGE_DIR,
+ this_dataset.group.name)
+ else:
+ gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
+ this_dataset.group.name)
+
+ logger.debug("gemma_command:" + gemma_command)
os.system(gemma_command)
- marker_obs = parse_gemma_output(this_dataset)
+ if use_loco == "True":
+ marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
+ else:
+ marker_obs = parse_gemma_output(this_dataset)
return marker_obs
@@ -172,3 +198,46 @@ def parse_gemma_output(this_dataset):
p_values.append(float(line.split("\t")[10]))
return marker_obs
+
+def parse_loco_output(this_dataset, gwa_output_filename):
+
+ output_filelist = []
+ with open("/home/zas1024/tmp/gn2/" + gwa_output_filename + ".json") as data_file:
+ data = json.load(data_file)
+
+ files = data['files']
+ for file in files:
+ output_filelist.append(file[2])
+
+ included_markers = []
+ p_values = []
+ marker_obs = []
+ previous_chr = 0
+
+ for this_file in output_filelist:
+ #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file:
+ with open(this_file) as output_file:
+ for line in output_file:
+ if line.startswith("chr"):
+ continue
+ else:
+ marker = {}
+ marker['name'] = line.split("\t")[1]
+ if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y":
+ marker['chr'] = int(line.split("\t")[0])
+ else:
+ marker['chr'] = line.split("\t")[0]
+ marker['Mb'] = float(line.split("\t")[2]) / 1000000
+ marker['p_value'] = float(line.split("\t")[10])
+ if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
+ marker['lod_score'] = 0
+ #marker['lrs_value'] = 0
+ else:
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ marker_obs.append(marker)
+
+ included_markers.append(line.split("\t")[1])
+ p_values.append(float(line.split("\t")[10]))
+
+ return marker_obs \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 55bbacac..24292c35 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -84,6 +84,7 @@ class MarkerRegression(object):
self.manhattan_plot = False
self.maf = start_vars['maf'] # Minor allele frequency
+ self.use_loco = start_vars['use_loco']
self.suggestive = ""
self.significant = ""
self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
@@ -158,7 +159,7 @@ class MarkerRegression(object):
self.score_type = "-log(p)"
self.manhattan_plot = True
with Bench("Running GEMMA"):
- marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.mapping_method)
+ marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.mapping_method, self.use_loco)
results = marker_obs
elif self.mapping_method == "rqtl_plink":
results = self.run_rqtl_plink()
diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
index 93d75a03..687cfeb5 100644
--- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py
+++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
@@ -257,6 +257,10 @@ class MarkerRegression(object):
self.controlLocus = ""
if 'covariates' in start_vars.keys():
self.covariates = start_vars['covariates']
+ if 'maf' in start_vars.keys():
+ self.maf = start_vars['maf']
+ if 'use_loco' in start_vars.keys():
+ self.use_loco = start_vars['use_loco']
#try:
self.selectedChr = int(start_vars['selected_chr'])
diff --git a/wqflask/wqflask/templates/marker_regression_gn1.html b/wqflask/wqflask/templates/marker_regression_gn1.html
index c6c6bc23..5c457275 100644
--- a/wqflask/wqflask/templates/marker_regression_gn1.html
+++ b/wqflask/wqflask/templates/marker_regression_gn1.html
@@ -20,6 +20,7 @@
<input type="hidden" name="value:{{ sample }}" value="{{ vals[loop.index - 1] }}">
{% endfor %}
<input type="hidden" name="maf">
+ <input type="hidden" name="use_loco" value="{{ use_loco }}">
<input type="hidden" name="selected_chr" value="{{ selectedChr }}">
<input type="hidden" name="manhattan_plot" value="{{ manhattan_plot }}">
<input type="hidden" name="num_perm" value="{{ nperm }}">
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html
index c24a5853..eadc7481 100644
--- a/wqflask/wqflask/templates/show_trait_mapping_tools.html
+++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html
@@ -302,8 +302,20 @@
<input name="maf_gemma" value="0.01" type="text" class="form-control">
</div>
</div>
+ <div class="mapping_method_fields form-group">
+ <label class="col-xs-4 control-label">Use LOCO</label>
+ <div style="margin-left: 20px;" class="col-xs-4 controls">
+ <label class="radio-inline">
+ <input type="radio" name="use_loco" value="True">
+ Yes
+ </label>
+ <label class="radio-inline">
+ <input type="radio" name="use_loco" value="False" checked="">
+ No
+ </label>
+ </div>
+ </div>
</div>
-
<div style="padding-top: 5px; padding-bottom: 5px; padding-left: 20px;" class="form-horizontal">
<div class="mapping_method_fields form-group">
<button type="button" id="select_covariates" class="btn btn-default">
@@ -395,7 +407,7 @@
<dt>GEMMA</dt>
<dd>
GEMMA is software implementing the Genome-wide Efficient Mixed Model Association algorithm for a standard linear mixed model for genome-wide association studies (GWAS).<br>
- <font style="color: red;">Note: There currently exists an issue where GEMMA can't be run on traits from the same group simultaneously. If you recieve an error, please wait a few minutes and try again.</font>
+ Note: There currently exists an issue where GEMMA can't be run on traits from the same group simultaneously. If you receive an error, please wait a few minutes and try again.
</dd>
<dt>PLINK</dt>
<dd>PLINK is a free, open-source whole genome association analysis toolset.</dd>
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 2d4fd0f2..f340c954 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -516,6 +516,7 @@ def loading_page():
'LRSCheck',
'covariates',
'maf',
+ 'use_loco',
'manhattan_plot',
'control_marker',
'control_marker_db',
@@ -572,6 +573,7 @@ def marker_regression_page():
'LRSCheck',
'covariates',
'maf',
+ 'use_loco',
'manhattan_plot',
'control_marker',
'control_marker_db',