diff options
4 files changed, 64 insertions, 225 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 5242ec05..dc13afc3 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -8,7 +8,7 @@ from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMP import utility.logger logger = utility.logger.getLogger(__name__ ) -def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): +def run_gemma(this_dataset, samples, vals, covariates, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: @@ -16,7 +16,7 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): else: genofile_name = this_dataset.group.name - gen_pheno_txt_file(this_dataset, genofile_name, vals, method) + gen_pheno_txt_file(this_dataset, genofile_name, vals) if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") @@ -32,73 +32,58 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): if covariates != "": gen_covariates_file(this_dataset, covariates) - if method == "gemma_plink": - gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name) + if use_loco == "True": + k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + k_output_filename) + logger.debug("k_command:" + generate_k_command) + os.system(generate_k_command) + + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, + k_output_filename, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name) + + gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) + gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + gwa_output_filename) else: - #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 2 -maf 0.1 -o %s_output' % (flat_files('mapping'), - gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) + gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + gwa_output_filename) + else: - if use_loco == "True": - k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) - logger.debug("k_command:" + generate_k_command) - os.system(generate_k_command) - - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, - k_output_filename, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name) - - gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - gwa_output_filename) - else: - gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - gwa_output_filename) + gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + genofile_name) else: - gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name) - - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - genofile_name) - else: - gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, - genofile_name) + gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, + genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) @@ -110,31 +95,16 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): return marker_obs -def gen_pheno_txt_file(this_dataset, genofile_name, vals, method): +def gen_pheno_txt_file(this_dataset, genofile_name, vals): """Generates phenotype file for GEMMA""" - if method == "gemma_plink": - current_file_data = [] - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: - for i, line in enumerate(outfile): - split_line = line.split() - current_file_data.append(split_line) - - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: - for i, line in enumerate(current_file_data): - if vals[i] == "x": - this_val = -9 - else: - this_val = vals[i] - outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") - else: - current_file_data = [] - with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: - for value in vals: - if value == "x": - outfile.write("NA\n") - else: - outfile.write(value + "\n") + current_file_data = [] + with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: + for value in vals: + if value == "x": + outfile.write("NA\n") + else: + outfile.write(value + "\n") def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index a3631462..033fa2a4 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -174,11 +174,11 @@ class MarkerRegression(object): self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split(":")[0] self.dataset.group.get_markers() - if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": + if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): - marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.mapping_method, self.use_loco) + marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() @@ -289,7 +289,7 @@ class MarkerRegression(object): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): - if len(self.qtl_results) > 20000: + if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure(self.qtl_results) with Bench("Trimming Markers for Table"): diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 4d98f5d8..81123de7 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -208,41 +208,10 @@ }; })(this)); - $("#plink_compute").on("click", (function(_this) { - return function() { - var form_data, url; - //$("#static_progress_bar_container").modal(); - url = "/loading"; - $('input[name=method]').val("plink"); - $('input[name=maf]').val($('input[name=maf_plink]').val()); - form_data = $('#trait_data_form').serialize(); - console.log("form_data is:", form_data); - return submit_special(url); - //return do_ajax_post(url, form_data); - }; - })(this)); - $("#gemma_compute").on("click", (function(_this) { return function() { var form_data, url; console.log("RUNNING GEMMA"); - //$("#static_progress_bar_container").modal(); - url = "/loading"; - $('input[name=method]').val("gemma"); - $('input[name=genofile]').val($('#genofile_gemma').val()); - $('input[name=maf]').val($('input[name=maf_gemma]').val()); - form_data = $('#trait_data_form').serialize(); - console.log("form_data is:", form_data); - return submit_special(url); - //return do_ajax_post(url, form_data); - }; - })(this)); - - $("#gemma_bimbam_compute").on("click", (function(_this) { - return function() { - var form_data, url; - console.log("RUNNING GEMMA"); - //$("#static_progress_bar_container").modal(); url = "/loading"; $('input[name=method]').val("gemma_bimbam"); $('input[name=num_perm]').val(0); @@ -251,7 +220,6 @@ form_data = $('#trait_data_form').serialize(); console.log("form_data is:", form_data); return submit_special(url); - //return do_ajax_post(url, form_data); }; })(this)); diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index d40a7bd6..3d31797a 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -35,7 +35,6 @@ </ul> <div class="tab-content"> - {% if dataset.group.mapping_id == "1" %} <div class="tab-pane active" id="gemma"> <div style="padding-top: 20px;" class="form-horizontal"> {% if genofiles and genofiles|length>0 %} @@ -88,13 +87,14 @@ <div class="mapping_method_fields form-group"> <label class="col-xs-3 control-label"></label> <div style="margin-left:20px;" class="col-xs-6"> - <button id="gemma_bimbam_compute" class="btn submit_special btn-success" data-url="/marker_regression" title="Compute Marker Regression"> + <button id="gemma_compute" class="btn submit_special btn-success" data-url="/marker_regression" title="Compute Marker Regression"> Compute </button> </div> </div> </div> </div> + {% if dataset.group.mapping_id == "1" %} <div class="tab-pane" id="interval_mapping"> <div style="margin-top: 20px" class="form-horizontal"> {% if genofiles and genofiles|length>0 %} @@ -164,8 +164,6 @@ </div> </div> --> - - <div class="mapping_method_fields form-group"> <label style="text-align: right;" class="col-xs-3 control-label">Marker<br>Regression</label> <div style="margin-left:20px;" class="col-xs-6 controls"> @@ -187,11 +185,9 @@ </button> </div> </div> - <!--<div id="alert_placeholder"></div>--> </div> </div> <div class="tab-pane" id="rqtl_geno"> - <div style="margin-top: 20px" class="form-horizontal"> {% if genofiles and genofiles|length>0 %} <div class="mapping_method_fields form-group"> @@ -320,117 +316,22 @@ </div> </div> {% endif %} - {% for mapping_method in dataset.group.mapping_names %} - {% if mapping_method == "GEMMA" %} - <div class="tab-pane active" id="gemma_plink"> - <div style="padding-top: 10px;" class="form-horizontal"> - {% if genofiles and genofiles|length>0 %} - <div class="mapping_method_fields form-group"> - <label style="text-align:right;" class="col-xs-3 control-label">Genotypes</label> - <div style="margin-left: 20px;" class="col-xs-4 controls"> - <select id="genofile_gemma" class="form-control"> - {% for item in genofiles %} - <option value="{{item['location']}}:{{item['title']}}">{{item['title']}}</option> - {% endfor %} - </select> - </div> - </div> - {% endif %} - <div class="mapping_method_fields form-group"> - <label style="text-align:right;" class="col-xs-3 control-label">MAF</label> - <div style="margin-left: 20px;" class="col-xs-4 controls"> - <input name="maf_gemma" value="0.01" type="text" class="form-control"> - </div> - </div> - <div class="mapping_method_fields form-group"> - <label style="text-align: right;" class="col-xs-3 control-label">Use LOCO</label> - <div style="margin-left:20px;" class="col-xs-6 controls"> - <label class="radio-inline"> - <input type="radio" name="use_loco" value="True" checked=""> - Yes - </label> - <label class="radio-inline"> - <input type="radio" name="use_loco" value="False"> - No - </label> - </div> - </div> - <div class="mapping_method_fields form-group"> - <label style="text-align: right;" class="col-xs-3 control-label">Covariates</label> - <div style="margin-left:20px;" class="col-xs-7"> - {% if g.user_session.user_ob and (g.user_session.user_ob.display_num_collections() == "") %} - No collections available. Please add traits to a collection to use them as covariates. - {% elif g.cookie_session.display_num_collections() == "" %} - No collections available. Please add traits to a collection to use them as covariates. - {% else %} - <div style="margin-bottom: 10px;" class="btn-group" role="group"> - <button type="button" id="select_covariates" class="btn btn-default">Select</button> - <button type="button" id="remove_covariates" class="btn btn-default">Remove</button> - </div> - <textarea rows="3" cols="20" readonly placeholder="No covariates selected" style="overflow-y: scroll; resize: none;" class="selected_covariates"></textarea> - {% endif %} - </div> - </div> - <div class="mapping_method_fields form-group"> - <label class="col-xs-3 control-label"></label> - <div style="margin-left:20px;" class="col-xs-6"> - <button id="gemma_compute" class="btn submit_special btn-success" data-url="/marker_regression" title="Compute Marker Regression"> - Compute - </button> - </div> - </div> - </div> - </div> - {% elif mapping_method == "PLINK" %} - <div class="tab-pane" id="plink"> - <div style="padding: 20px" class="form-horizontal"> - <div class="mapping_method_fields form-group"> - <label style="text-align: right;" class="col-xs-3 control-label">MAF</label> - <div style="margin-left: 20px;" class="col-xs-4 controls"> - <input name="maf_plink" value="0.01" type="text" class="form-control"> - </div> - </div> - </div> - - <div class="form-group"> - <label style="text-align: right;" class="col-xs-3 control-label"></label> - <div style="margin-left:20px;" class="col-xs-6 controls"> - <button id="plink_compute" class="btn submit_special btn-success" data-url="/marker_regression" title="Compute Marker Regression"> - Compute - </button> - </div> - </div> - </div> - {% endif %} - {% endfor %} </div> </div> </div> <div class="col-xs-5"> - {% if dataset.group.mapping_id == "1" %} <dl> <dt>GEMMA</dt> <dd>GEMMA is software implementing the Genome-wide Efficient Mixed Model Association algorithm for a standard linear mixed model for genome-wide association studies (GWAS).</dd> + {% if dataset.group.mapping_id == "1" %} <dt>Interval Mapping</dt> <dd>Interval mapping is a process in which the statistical significance of a hypothetical QTL is evaluated at regular points across a chromosome, even in the absence of explicit genotype data at those points.</dd> <dt>R/qtl</dt> <dd>R/qtl is an extensible, interactive environment for mapping quantitative trait loci (QTL) in experimental crosses.</dd> <dt>pyLMM</dt> <dd>pyLMM is a fast and lightweight linear mixed-model (LMM) solver for use in genome-wide association studies (GWAS).</dd> + {% endif %} </dl> - {% else %} - <dl> - {% if mapping_method == "GEMMA" %} - <dt>GEMMA</dt> - <dd> - GEMMA is software implementing the Genome-wide Efficient Mixed Model Association algorithm for a standard linear mixed model for genome-wide association studies (GWAS).<br> - </dd> - {% elif mapping_method == "PLINK" %} - <dt>PLINK</dt> - <dd>PLINK is a free, open-source whole genome association analysis toolset.</dd> - {% endif %} - </dl> - {% endif %} </div> <div id="mapping_result_holder_wrapper" style="display:none;"> <div id="mapping_result_holder"></div> |