From ff20eff6ad7e86f890f7de7fb3779e313d9b66ee Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 1 May 2018 19:59:57 +0000 Subject: Removed all code related to using GEMMA with PLINK input files --- wqflask/wqflask/marker_regression/gemma_mapping.py | 144 ++++++++------------- .../wqflask/marker_regression/marker_regression.py | 6 +- .../new/javascript/show_trait_mapping_tools.js | 32 ----- .../templates/show_trait_mapping_tools.html | 107 +-------------- 4 files changed, 64 insertions(+), 225 deletions(-) diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 5242ec05..dc13afc3 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -8,7 +8,7 @@ from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMP import utility.logger logger = utility.logger.getLogger(__name__ ) -def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): +def run_gemma(this_dataset, samples, vals, covariates, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: @@ -16,7 +16,7 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): else: genofile_name = this_dataset.group.name - gen_pheno_txt_file(this_dataset, genofile_name, vals, method) + gen_pheno_txt_file(this_dataset, genofile_name, vals) if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") @@ -32,73 +32,58 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): if covariates != "": gen_covariates_file(this_dataset, covariates) - if method == "gemma_plink": - gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name) + if use_loco == "True": + k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + k_output_filename) + logger.debug("k_command:" + generate_k_command) + os.system(generate_k_command) + + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, + k_output_filename, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name) + + gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) + gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + gwa_output_filename) else: - #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 2 -maf 0.1 -o %s_output' % (flat_files('mapping'), - gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) + gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + genofile_name, + TEMPDIR, + gwa_output_filename) + else: - if use_loco == "True": - k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - k_output_filename) - logger.debug("k_command:" + generate_k_command) - os.system(generate_k_command) - - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, - k_output_filename, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name) - - gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - gwa_output_filename) - else: - gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), - genofile_name, - TEMPDIR, - gwa_output_filename) + gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name, + flat_files('genotype/bimbam'), + genofile_name) + if covariates != "": + gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + genofile_name) else: - gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name, - flat_files('genotype/bimbam'), - genofile_name) - - if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - genofile_name) - else: - gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, - genofile_name) + gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, + genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) @@ -110,31 +95,16 @@ def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): return marker_obs -def gen_pheno_txt_file(this_dataset, genofile_name, vals, method): +def gen_pheno_txt_file(this_dataset, genofile_name, vals): """Generates phenotype file for GEMMA""" - if method == "gemma_plink": - current_file_data = [] - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: - for i, line in enumerate(outfile): - split_line = line.split() - current_file_data.append(split_line) - - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: - for i, line in enumerate(current_file_data): - if vals[i] == "x": - this_val = -9 - else: - this_val = vals[i] - outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") - else: - current_file_data = [] - with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: - for value in vals: - if value == "x": - outfile.write("NA\n") - else: - outfile.write(value + "\n") + current_file_data = [] + with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: + for value in vals: + if value == "x": + outfile.write("NA\n") + else: + outfile.write(value + "\n") def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index a3631462..033fa2a4 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -174,11 +174,11 @@ class MarkerRegression(object): self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split(":")[0] self.dataset.group.get_markers() - if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": + if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): - marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.mapping_method, self.use_loco) + marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() @@ -289,7 +289,7 @@ class MarkerRegression(object): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): - if len(self.qtl_results) > 20000: + if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure(self.qtl_results) with Bench("Trimming Markers for Table"): diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 4d98f5d8..81123de7 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -208,41 +208,10 @@ }; })(this)); - $("#plink_compute").on("click", (function(_this) { - return function() { - var form_data, url; - //$("#static_progress_bar_container").modal(); - url = "/loading"; - $('input[name=method]').val("plink"); - $('input[name=maf]').val($('input[name=maf_plink]').val()); - form_data = $('#trait_data_form').serialize(); - console.log("form_data is:", form_data); - return submit_special(url); - //return do_ajax_post(url, form_data); - }; - })(this)); - $("#gemma_compute").on("click", (function(_this) { return function() { var form_data, url; console.log("RUNNING GEMMA"); - //$("#static_progress_bar_container").modal(); - url = "/loading"; - $('input[name=method]').val("gemma"); - $('input[name=genofile]').val($('#genofile_gemma').val()); - $('input[name=maf]').val($('input[name=maf_gemma]').val()); - form_data = $('#trait_data_form').serialize(); - console.log("form_data is:", form_data); - return submit_special(url); - //return do_ajax_post(url, form_data); - }; - })(this)); - - $("#gemma_bimbam_compute").on("click", (function(_this) { - return function() { - var form_data, url; - console.log("RUNNING GEMMA"); - //$("#static_progress_bar_container").modal(); url = "/loading"; $('input[name=method]').val("gemma_bimbam"); $('input[name=num_perm]').val(0); @@ -251,7 +220,6 @@ form_data = $('#trait_data_form').serialize(); console.log("form_data is:", form_data); return submit_special(url); - //return do_ajax_post(url, form_data); }; })(this)); diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index d40a7bd6..3d31797a 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -35,7 +35,6 @@