From cfe7dee2903bde6b497559e3bef4615a690819c6 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 29 Aug 2017 16:15:46 +0000 Subject: Mostly changes related to getting gemma working with covariates and BIMBAM file format Other changes include: - Fixing links in the network graph - Changing button colors on show trait page - Changing color scheme for heatmap - Making rows highlight in yellow when clicked on - Some changes to table appearance (like giving gene global search a horizontal scroll) --- wqflask/base/webqtlConfig.py | 4 +- wqflask/wqflask/marker_regression/gemma_mapping.py | 151 +++++++++++++++++---- .../wqflask/marker_regression/marker_regression.py | 5 +- .../marker_regression/marker_regression_gn1.py | 19 ++- wqflask/wqflask/network_graph/network_graph.py | 12 +- wqflask/wqflask/show_trait/show_trait.py | 6 +- .../javascript/get_covariates_from_collection.js | 106 ++------------- .../wqflask/static/new/javascript/lodheatmap.js | 2 +- .../wqflask/static/new/javascript/network_graph.js | 4 +- .../wqflask/static/new/javascript/show_trait.js | 23 ++-- .../new/javascript/show_trait_mapping_tools.js | 15 ++ .../packages/DataTables/css/jquery.dataTables.css | 12 +- .../static/packages/bootstrap/css/bootstrap.css | 6 +- .../packages/bootstrap/css/non-responsive.css | 2 +- wqflask/wqflask/templates/collections/list.html | 5 - wqflask/wqflask/templates/gsearch_gene.html | 4 +- .../wqflask/templates/marker_regression_gn1.html | 1 + wqflask/wqflask/templates/search_result_page.html | 2 + wqflask/wqflask/templates/show_trait.html | 1 + .../show_trait_calculate_correlations.html | 2 +- .../templates/show_trait_mapping_tools.html | 62 +++++++-- wqflask/wqflask/views.py | 2 + 22 files changed, 263 insertions(+), 183 deletions(-) diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index e5f10edf..c3f1e76a 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -70,8 +70,8 @@ assert_writable_dir(TMPDIR) CACHEDIR = mk_dir(TMPDIR+'/cache/') # We can no longer write into the git tree: -GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generated/') -GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generated_text/') +GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'generated/') +GENERATED_TEXT_DIR = mk_dir(TMPDIR+'generated_text/') # Make sure we have permissions to access these assert_writable_dir(CACHEDIR) diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 6e2cb972..31e94266 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,24 +1,75 @@ import os, math from base import webqtlConfig +from base.trait import GeneralTrait +from base.data_set import create_dataset from utility.tools import flat_files, GEMMA_COMMAND -def run_gemma(this_dataset, samples, vals): +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def run_gemma(this_dataset, samples, vals, covariates, method): """Generates p-values for each marker using GEMMA""" print("INSIDE GEMMA_MAPPING") - gen_pheno_txt_file(this_dataset, vals) + gen_pheno_txt_file(this_dataset, vals, method) + + if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)): + open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name), "w+") - # use GEMMA_RUN in the next one, create a unique temp file + logger.debug("COVARIATES_GEMMA:", covariates) - gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % (flat_files('mapping'), - this_dataset.group.name, - flat_files('mapping'), - this_dataset.group.name, - webqtlConfig.GENERATED_IMAGE_DIR, - this_dataset.group.name) - print("gemma_command:" + gemma_command) + if covariates != "": + gen_covariates_file(this_dataset, covariates) + if method == "gemma": + #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -o %s_output' % (flat_files('mapping'), + gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('mapping'), + this_dataset.group.name, + flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + # use GEMMA_RUN in the next one, create a unique temp file + else: + logger.debug("FLAT FILES:", flat_files('mapping')) + #gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -o %s_output' % (flat_files('genotype/bimbam'), + gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + else: + if method == "gemma": + #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), + gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % (flat_files('mapping'), + this_dataset.group.name, + flat_files('mapping'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + else: + #gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('genotype/bimbam'), + gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1 -outdir %s -debug -o %s_output' % (flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + flat_files('genotype/bimbam'), + this_dataset.group.name, + webqtlConfig.GENERATED_IMAGE_DIR, + this_dataset.group.name) + logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) @@ -26,27 +77,70 @@ def run_gemma(this_dataset, samples, vals): return marker_obs -def gen_pheno_txt_file(this_dataset, vals): +def gen_pheno_txt_file(this_dataset, vals, method): """Generates phenotype file for GEMMA""" - current_file_data = [] - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: - for i, line in enumerate(outfile): - split_line = line.split() - current_file_data.append(split_line) - - with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: - for i, line in enumerate(current_file_data): - if vals[i] == "x": - this_val = -9 + if method == "gemma": + current_file_data = [] + with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: + for i, line in enumerate(outfile): + split_line = line.split() + current_file_data.append(split_line) + + with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: + for i, line in enumerate(current_file_data): + if vals[i] == "x": + this_val = -9 + else: + this_val = vals[i] + outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + else: + current_file_data = [] + with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), this_dataset.group.name), "w") as outfile: + for value in vals: + if value == "x": + outfile.write("NA\n") + else: + outfile.write(value + "\n") + +def gen_covariates_file(this_dataset, covariates): + covariate_list = covariates.split(",") + covariate_data_object = [] + for covariate in covariate_list: + this_covariate_data = [] + trait_name = covariate.split(":")[0] + dataset_ob = create_dataset(covariate.split(":")[1]) + trait_ob = GeneralTrait(dataset=dataset_ob, + name=trait_name, + cellid=None) + + #trait_samples = this_dataset.group.all_samples_ordered() + this_dataset.group.get_samplelist() + trait_samples = this_dataset.group.samplelist + logger.debug("SAMPLES:", trait_samples) + trait_sample_data = trait_ob.data + logger.debug("SAMPLE DATA:", trait_sample_data) + for index, sample in enumerate(trait_samples): + if sample in trait_sample_data: + sample_value = trait_sample_data[sample].value + this_covariate_data.append(sample_value) else: - this_val = vals[i] - outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") + this_covariate_data.append("-9") + covariate_data_object.append(this_covariate_data) + + with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: + for i in range(len(covariate_data_object[0])): + for this_covariate in covariate_data_object: + outfile.write(str(this_covariate[i]) + "\t") + outfile.write("\n") def parse_gemma_output(this_dataset): included_markers = [] p_values = [] marker_obs = [] + previous_chr = 0 + + #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file: with open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)) as output_file: for line in output_file: if line.startswith("chr"): @@ -54,7 +148,16 @@ def parse_gemma_output(this_dataset): else: marker = {} marker['name'] = line.split("\t")[1] - marker['chr'] = int(line.split("\t")[0]) + if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y": + marker['chr'] = int(line.split("\t")[0]) + else: + marker['chr'] = line.split("\t")[0] + # try: + # marker['chr'] = int(line.split("\t")[0]) + # except: + # marker['chr'] = previous_chr + 1 + # if marker['chr'] != previous_chr: + # previous_chr = marker['chr'] marker['Mb'] = float(line.split("\t")[2]) / 1000000 marker['p_value'] = float(line.split("\t")[10]) if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 60424468..55bbacac 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -92,6 +92,7 @@ class MarkerRegression(object): self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] + self.covariates = start_vars['covariates'] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 @@ -153,11 +154,11 @@ class MarkerRegression(object): self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split(":")[0] self.dataset.group.get_markers() - if self.mapping_method == "gemma": + if self.mapping_method == "gemma" or self.mapping_method == "gemma_bimbam": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): - marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) + marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.mapping_method) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py index 08332e7d..93d75a03 100644 --- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py +++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py @@ -255,6 +255,8 @@ class MarkerRegression(object): self.controlLocus = start_vars['control_marker'] else: self.controlLocus = "" + if 'covariates' in start_vars.keys(): + self.covariates = start_vars['covariates'] #try: self.selectedChr = int(start_vars['selected_chr']) @@ -1369,7 +1371,10 @@ class MarkerRegression(object): #draw gray blocks for 3' and 5' UTR blocks if cdsStart and cdsEnd: - + logger.debug("txStart:", txStart) + logger.debug("cdsStart:", cdsStart) + logger.debug("txEnd:", txEnd) + logger.debug("cdsEnd:", cdsEnd) utrStartPix = (txStart-startMb)*plotXScale + xLeftOffset utrEndPix = (cdsStart-startMb)*plotXScale + xLeftOffset if (utrStartPix < xLeftOffset): @@ -1380,9 +1385,10 @@ class MarkerRegression(object): utrEndPix = xLeftOffset + plotWidth if (utrStartPix > xLeftOffset + plotWidth): utrStartPix = xLeftOffset + plotWidth - canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) + #canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) - if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + #if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: if strand == "-": labelText = "3'" else: @@ -1401,10 +1407,11 @@ class MarkerRegression(object): utrEndPix = xLeftOffset + plotWidth if (utrStartPix > xLeftOffset + plotWidth): utrStartPix = xLeftOffset + plotWidth - canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) + #canvas.drawRect(utrStartPix, geneYLocation, utrEndPix, (geneYLocation+self.EACH_GENE_HEIGHT*zoom), edgeColor=utrColor, fillColor =utrColor) - if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: - if tstrand == "-": + #if self.DRAW_UTR_LABELS and self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + if self.endMb - self.startMb <= self.DRAW_UTR_LABELS_MB: + if strand == "-": labelText = "5'" else: labelText = "3'" diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index 7eb5be61..b42904a4 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -182,24 +182,18 @@ class NetworkGraph(object): self.edges_list.append(edge_dict) if trait_db[1].type == "ProbeSet": - node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), - 'name' : str(this_trait.name), - 'dataset' : str(this_trait.dataset.name), + node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.symbol, 'symbol' : this_trait.symbol, 'geneid' : this_trait.geneid, 'omim' : this_trait.omim, 'max_corr' : max_corr } } elif trait_db[1].type == "Publish": - node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), - 'name' : str(this_trait.name), - 'dataset' : str(this_trait.dataset.name), + node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } else: - node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), - 'name' : str(this_trait.name), - 'dataset' : str(this_trait.dataset.name), + node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } self.nodes_list.append(node_dict) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index b2dfc4ac..13ad4a8f 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -133,9 +133,9 @@ class ShowTrait(object): self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: - self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only" - self.sample_group_types['samples_other'] = "Non-" + self.dataset.group.name - self.sample_group_types['samples_all'] = "All Cases" + self.sample_group_types['samples_primary'] = self.dataset.group.name + self.sample_group_types['samples_other'] = "Other" + self.sample_group_types['samples_all'] = "All" else: self.sample_group_types['samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] diff --git a/wqflask/wqflask/static/new/javascript/get_covariates_from_collection.js b/wqflask/wqflask/static/new/javascript/get_covariates_from_collection.js index c267b045..92e2b13b 100644 --- a/wqflask/wqflask/static/new/javascript/get_covariates_from_collection.js +++ b/wqflask/wqflask/static/new/javascript/get_covariates_from_collection.js @@ -2,8 +2,6 @@ var add_trait_data, assemble_into_json, back_to_collections, collection_click, collection_list, color_by_trait, create_trait_data_csv, get_this_trait_vals, get_trait_data, process_traits, selected_traits, submit_click, this_trait_data, trait_click, __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; -console.log("before get_traits_from_collection"); - collection_list = null; this_trait_data = null; @@ -25,109 +23,30 @@ collection_click = function() { }; submit_click = function() { - var all_vals, sample, samples, scatter_matrix, this_trait_vals, trait, trait_names, trait_vals_csv, traits, _i, _j, _len, _len1, _ref; - selected_traits = {}; - traits = []; + var covariates_string = ""; $('#collections_holder').find('input[type=checkbox]:checked').each(function() { - var this_dataset, this_trait, this_trait_url; + var this_dataset, this_trait; this_trait = $(this).parents('tr').find('.trait').text(); console.log("this_trait is:", this_trait); this_dataset = $(this).parents('tr').find('.dataset').text(); console.log("this_dataset is:", this_dataset); - this_trait_url = "/trait/get_sample_data?trait=" + this_trait + "&dataset=" + this_dataset; - return $.ajax({ - dataType: "json", - url: this_trait_url, - async: false, - success: add_trait_data - }); + covariates_string += this_trait + ":" + this_dataset + "," }); - console.log("SELECTED_TRAITS IS:", selected_traits); - trait_names = []; - samples = $('input[name=allsamples]').val().split(" "); - all_vals = []; - this_trait_vals = get_this_trait_vals(samples); - all_vals.push(this_trait_vals); - _ref = Object.keys(selected_traits); - for (_i = 0, _len = _ref.length; _i < _len; _i++) { - trait = _ref[_i]; - trait_names.push(trait); - this_trait_vals = []; - for (_j = 0, _len1 = samples.length; _j < _len1; _j++) { - sample = samples[_j]; - if (__indexOf.call(Object.keys(selected_traits[trait]), sample) >= 0) { - this_trait_vals.push(parseFloat(selected_traits[trait][sample])); - } else { - this_trait_vals.push(null); - } - } - all_vals.push(this_trait_vals); - } - trait_vals_csv = create_trait_data_csv(selected_traits); - scatter_matrix = new ScatterMatrix(trait_vals_csv); - scatter_matrix.render(); - return $.colorbox.close(); -}; + // Trim the last comma + covariates_string = covariates_string.substring(0, covariates_string.length - 1) + console.log("COVARIATES:", covariates_string) -create_trait_data_csv = function(selected_traits) { - var all_vals, index, sample, sample_vals, samples, this_trait_vals, trait, trait_names, trait_vals_csv, _i, _j, _k, _l, _len, _len1, _len2, _len3, _ref; - trait_names = []; - trait_names.push($('input[name=trait_id]').val()); - samples = $('input[name=allsamples]').val().split(" "); - all_vals = []; - this_trait_vals = get_this_trait_vals(samples); - all_vals.push(this_trait_vals); - _ref = Object.keys(selected_traits); - for (_i = 0, _len = _ref.length; _i < _len; _i++) { - trait = _ref[_i]; - trait_names.push(trait); - this_trait_vals = []; - for (_j = 0, _len1 = samples.length; _j < _len1; _j++) { - sample = samples[_j]; - if (__indexOf.call(Object.keys(selected_traits[trait]), sample) >= 0) { - this_trait_vals.push(parseFloat(selected_traits[trait][sample])); - } else { - this_trait_vals.push(null); - } - } - all_vals.push(this_trait_vals); - } - console.log("all_vals:", all_vals); - trait_vals_csv = trait_names.join(","); - trait_vals_csv += "\n"; - for (index = _k = 0, _len2 = samples.length; _k < _len2; index = ++_k) { - sample = samples[index]; - if (all_vals[0][index] === null) { - continue; - } - sample_vals = []; - for (_l = 0, _len3 = all_vals.length; _l < _len3; _l++) { - trait = all_vals[_l]; - sample_vals.push(trait[index]); - } - trait_vals_csv += sample_vals.join(","); - trait_vals_csv += "\n"; - } - return trait_vals_csv; + $("input[name=covariates]").val(covariates_string) + + return $.colorbox.close(); }; trait_click = function() { var dataset, this_trait_url, trait; - console.log("Clicking on:", $(this)); trait = $(this).parent().find('.trait').text(); dataset = $(this).parent().find('.dataset').text(); - console.log("BEFORE COVAR:", trait + ":" + dataset) - $('input[name=covariates]').val(trait + ":" + dataset) - console.log("AFTER COVAR:", $('input[name=covariates]').val()) + $("input[name=covariates]").val(trait + ":" + dataset) return $.colorbox.close(); - // this_trait_url = "/trait/get_sample_data?trait=" + trait + "&dataset=" + dataset; - // console.log("this_trait_url", this_trait_url); - // $.ajax({ - // dataType: "json", - // url: this_trait_url, - // success: get_trait_data - // }); - // return $.colorbox.close(); }; add_trait_data = function(trait_data, textStatus, jqXHR) { @@ -140,9 +59,7 @@ add_trait_data = function(trait_data, textStatus, jqXHR) { get_trait_data = function(trait_data, textStatus, jqXHR) { var sample, samples, this_trait_vals, trait_sample_data, vals, _i, _len; - console.log("trait:", trait_data[0]); trait_sample_data = trait_data[1]; - console.log("trait_sample_data:", trait_sample_data); samples = $('input[name=allsamples]').val().split(" "); vals = []; for (_i = 0, _len = samples.length; _i < _len; _i++) { @@ -158,7 +75,6 @@ get_trait_data = function(trait_data, textStatus, jqXHR) { } $('#hidden_inputs').append(''); this_trait_vals = get_this_trait_vals(samples); - console.log("THE LENGTH IS:", $('input[name=vals]').length); return color_by_trait(trait_sample_data); }; @@ -174,7 +90,6 @@ get_this_trait_vals = function(samples) { this_trait_vals.push(null); } } - console.log("this_trait_vals:", this_trait_vals); this_vals_json = '[' + this_trait_vals.toString() + ']'; return this_trait_vals; }; @@ -231,7 +146,6 @@ back_to_collections = function() { return $('#collections_holder').colorbox.resize(); }; -console.log("inside get_traits_from_collection"); $(".collection_line").on("click", collection_click); $("#submit").on("click", submit_click); $(".trait").on("click", trait_click); diff --git a/wqflask/wqflask/static/new/javascript/lodheatmap.js b/wqflask/wqflask/static/new/javascript/lodheatmap.js index f604cd10..965a1d53 100644 --- a/wqflask/wqflask/static/new/javascript/lodheatmap.js +++ b/wqflask/wqflask/static/new/javascript/lodheatmap.js @@ -20,7 +20,7 @@ lodheatmap = function() { chrGap = 8; titlepos = 20; rectcolor = d3.rgb(230, 230, 230); - colors = ["blue", "white", "crimson"]; + colors = ["slateblue", "white", "red"]; title = ""; xlab = "Chromosome"; ylab = ""; diff --git a/wqflask/wqflask/static/new/javascript/network_graph.js b/wqflask/wqflask/static/new/javascript/network_graph.js index 03ef1c98..0129bcae 100644 --- a/wqflask/wqflask/static/new/javascript/network_graph.js +++ b/wqflask/wqflask/static/new/javascript/network_graph.js @@ -6,7 +6,7 @@ window.onload=function() { container: $('#cytoscapeweb'), // container to render in elements: elements_list, - + style: [ // the stylesheet for the graph { selector: 'node', @@ -82,7 +82,7 @@ window.onload=function() { cy.nodes().qtip({ content: function(){ qtip_content = '' - gn_link = ''+''+this.data().id +''+'
' + gn_link = ''+''+this.data().id +''+'
' qtip_content += gn_link if (typeof(this.data().geneid) !== 'undefined'){ ncbi_link = 'NCBI'+'
' diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index f232c6eb..f08965e9 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -74,11 +74,14 @@ return open_trait_selection(); }; })(this)); - d3.select("#select_covariate").on("click", (function(_this) { + d3.select("#select_covariates").on("click", (function(_this) { return function() { return open_covariate_selection(); }; })(this)); + $("#remove_covariates").click(function () { + $("input[name=covariates]").val("") + }); d3.select("#clear_compare_trait").on("click", (function(_this) { return function() { return $('.scatter-matrix-container').remove(); @@ -101,15 +104,15 @@ })(this)); }; open_covariate_selection = function() { - return $('#collections_holder').load('/collections/list?select_covariates #collections_list', (function(_this) { + return $('#collections_holder').load('/collections/list #collections_list', (function(_this) { return function() { $.colorbox({ inline: true, href: "#collections_holder", onComplete: function(){ - console.log("before get script") + console.log("before get cov script") $.getScript("/static/new/javascript/get_covariates_from_collection.js"); - console.log("after get script") + console.log("after get cov script") } }); return $('a.collection_name').attr('onClick', 'return false'); @@ -199,16 +202,16 @@ }; make_table = function() { var header, key, row, row_line, table, the_id, the_rows, value, _i, _len, _ref, _ref1; - header = "Statistic"; + header = "Statistic"; _ref = js_data.sample_group_types; for (key in _ref) { if (!__hasProp.call(_ref, key)) continue; value = _ref[key]; the_id = process_id("column", key); if (Object.keys(_ref).length > 1) { - header += "" + value + ""; + header += "" + value + ""; } else { - header += "Value"; + header += "Value"; } } @@ -221,16 +224,16 @@ } row_line = ""; if (row.url != null) { - row_line += "
" + row.pretty + ""; + row_line += "" + row.pretty + ""; } else { - row_line += "" + row.pretty + ""; + row_line += "" + row.pretty + ""; } _ref1 = js_data.sample_group_types; for (key in _ref1) { if (!__hasProp.call(_ref1, key)) continue; value = _ref1[key]; the_id = process_id(key, row.vn); - row_line += "foo"; + row_line += "foo"; } row_line += ""; the_rows += row_line; diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index d6cd8134..e11aaf4a 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -237,6 +237,21 @@ }; })(this)); + $("#gemma_bimbam_compute").on("click", (function(_this) { + return function() { + var form_data, url; + console.log("RUNNING GEMMA"); + //$("#static_progress_bar_container").modal(); + url = "/loading"; + $('input[name=method]').val("gemma_bimbam"); + $('input[name=maf]').val($('input[name=maf_gemma]').val()); + form_data = $('#trait_data_form').serialize(); + console.log("form_data is:", form_data); + return submit_special(url); + //return do_ajax_post(url, form_data); + }; + })(this)); + $("#interval_mapping_compute").on("click", (function(_this) { return function() { var form_data, url; diff --git a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css index a5b9f09c..f376d15e 100644 --- a/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css +++ b/wqflask/wqflask/static/new/packages/DataTables/css/jquery.dataTables.css @@ -80,7 +80,7 @@ table.dataTable tbody tr { background-color: #ffffff; } table.dataTable tbody tr.selected { - background-color: #abb9d3; + background-color: #ffee99; } table.dataTable tbody th, table.dataTable tbody td { @@ -111,13 +111,13 @@ table.dataTable.stripe tbody tr.odd, table.dataTable.display tbody tr.odd { background-color: #f9f9f9; } table.dataTable.stripe tbody tr.odd.selected, table.dataTable.display tbody tr.odd.selected { - background-color: #abb9d3; + background-color: #FFEE99; } table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover { background-color: whitesmoke; } table.dataTable.hover tbody tr:hover.selected, table.dataTable.display tbody tr:hover.selected { - background-color: #a9b7d1; + background-color: #FFEE99; } table.dataTable.order-column tbody tr > .sorting_1, table.dataTable.order-column tbody tr > .sorting_2, @@ -143,7 +143,7 @@ table.dataTable.display tbody tr.odd > .sorting_3, table.dataTable.order-column. background-color: whitesmoke; } table.dataTable.display tbody tr.odd.selected > .sorting_1, table.dataTable.order-column.stripe tbody tr.odd.selected > .sorting_1 { - background-color: #a6b3cd; + background-color: #ffe047; } table.dataTable.display tbody tr.odd.selected > .sorting_2, table.dataTable.order-column.stripe tbody tr.odd.selected > .sorting_2 { background-color: #a7b5ce; @@ -161,7 +161,7 @@ table.dataTable.display tbody tr.even > .sorting_3, table.dataTable.order-column background-color: #fdfdfd; } table.dataTable.display tbody tr.even.selected > .sorting_1, table.dataTable.order-column.stripe tbody tr.even.selected > .sorting_1 { - background-color: #acbad4; + background-color: #ffe047; } table.dataTable.display tbody tr.even.selected > .sorting_2, table.dataTable.order-column.stripe tbody tr.even.selected > .sorting_2 { background-color: #adbbd6; @@ -179,7 +179,7 @@ table.dataTable.display tbody tr:hover > .sorting_3, table.dataTable.order-colum background-color: #eeeeee; } table.dataTable.display tbody tr:hover.selected > .sorting_1, table.dataTable.order-column.hover tbody tr:hover.selected > .sorting_1 { - background-color: #a1aec7; + background-color: #ffe047; } table.dataTable.display tbody tr:hover.selected > .sorting_2, table.dataTable.order-column.hover tbody tr:hover.selected > .sorting_2 { background-color: #a2afc8; diff --git a/wqflask/wqflask/static/packages/bootstrap/css/bootstrap.css b/wqflask/wqflask/static/packages/bootstrap/css/bootstrap.css index 91aaa98f..e6abe790 100644 --- a/wqflask/wqflask/static/packages/bootstrap/css/bootstrap.css +++ b/wqflask/wqflask/static/packages/bootstrap/css/bootstrap.css @@ -3803,7 +3803,7 @@ select[multiple].input-group-sm > .input-group-btn > .btn { .nav-pills > li.active > a:hover, .nav-pills > li.active > a:focus { color: #fff; - background-color: #428bca; + background-color: #3071a9; /* Tab cell background color */ } .nav-stacked > li { float: none; @@ -4485,7 +4485,7 @@ fieldset[disabled] .navbar-inverse .btn-link:focus { z-index: 2; color: #fff; cursor: default; - background-color: #428bca; + background-color: #3071a9; border-color: #428bca; } .pagination > .disabled > span, @@ -4667,7 +4667,7 @@ a.badge:focus { } a.list-group-item.active > .badge, .nav-pills > .active > a > .badge { - color: #428bca; + color: #3071a9; background-color: #fff; } .nav-pills > li > a > .badge { diff --git a/wqflask/wqflask/static/packages/bootstrap/css/non-responsive.css b/wqflask/wqflask/static/packages/bootstrap/css/non-responsive.css index 9da73a8f..d352390f 100644 --- a/wqflask/wqflask/static/packages/bootstrap/css/non-responsive.css +++ b/wqflask/wqflask/static/packages/bootstrap/css/non-responsive.css @@ -97,7 +97,7 @@ body { .navbar .navbar-nav .open .dropdown-menu > .active > a:hover, .navbar .navbar-nav .open .dropdown-menu > .active > a:focus { color: #fff !important; - background-color: #428bca !important; + background-color: #3071a9 !important; } .navbar .navbar-nav .open .dropdown-menu > .disabled > a, .navbar .navbar-nav .open .dropdown-menu > .disabled > a:hover, diff --git a/wqflask/wqflask/templates/collections/list.html b/wqflask/wqflask/templates/collections/list.html index cc60ecff..ad72052e 100644 --- a/wqflask/wqflask/templates/collections/list.html +++ b/wqflask/wqflask/templates/collections/list.html @@ -65,11 +65,6 @@ {% endfor %} - {% if "color_by_trait" in params %} - - {% else %} - - {% endif %} diff --git a/wqflask/wqflask/templates/gsearch_gene.html b/wqflask/wqflask/templates/gsearch_gene.html index c2f687fd..6f2ad0b8 100644 --- a/wqflask/wqflask/templates/gsearch_gene.html +++ b/wqflask/wqflask/templates/gsearch_gene.html @@ -27,8 +27,8 @@
-
- +
+
diff --git a/wqflask/wqflask/templates/marker_regression_gn1.html b/wqflask/wqflask/templates/marker_regression_gn1.html index 5afd134a..c6c6bc23 100644 --- a/wqflask/wqflask/templates/marker_regression_gn1.html +++ b/wqflask/wqflask/templates/marker_regression_gn1.html @@ -27,6 +27,7 @@ + diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html index 93e24cc7..9ad8a53e 100644 --- a/wqflask/wqflask/templates/search_result_page.html +++ b/wqflask/wqflask/templates/search_result_page.html @@ -234,6 +234,7 @@ "autoWidth": false, "deferRender": true, "bSortClasses": false, + "scrollX": true, "scrollY": "600px", "scrollCollapse": false, "scroller": true, @@ -306,6 +307,7 @@ "autoWidth": false, "deferRender": true, "bSortClasses": false, + "scrollY": "600px", "scrollCollapse": false, "scroller": false, "paging": false, diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 7c4dbc60..c2724fea 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -142,6 +142,7 @@ + diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 0e15ce9c..c5f815ce 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -105,7 +105,7 @@
-
diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index bded60d2..1151c74b 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -15,6 +15,9 @@
  • R/qtl
  • +
  • + GEMMA +
  • {% endif %} {# if use_plink_gemma #} {% for mapping_method in dataset.group.mapping_names %} @@ -122,7 +125,7 @@
    -
    @@ -179,7 +182,7 @@ -->
    -
    @@ -286,30 +289,69 @@
    -
    +
    +
    +
    + +
    + +
    +
    +
    + +
    +
    + + +
    +
    + +
    +
    + +
    +
    +
    {% endif %} {# if use_plink_gemma #} {% if dataset.group.mapping_id == "2" or dataset.group.mapping_id == "4" %} -
    -
    +
    +
    - +
    +
    +
    + + +
    +
    +
    - -
    -
    @@ -329,7 +371,7 @@
    -
    diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 83496000..2d4fd0f2 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -514,6 +514,7 @@ def loading_page(): 'bootCheck', 'bootstrap_results', 'LRSCheck', + 'covariates', 'maf', 'manhattan_plot', 'control_marker', @@ -569,6 +570,7 @@ def marker_regression_page(): 'bootCheck', 'bootstrap_results', 'LRSCheck', + 'covariates', 'maf', 'manhattan_plot', 'control_marker', -- cgit v1.2.3 From 4f06516befcb16b802afcb81bae28b55279a3c1e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 4 Sep 2017 02:15:11 -0500 Subject: gemma-wrapper: find binary in profile --- bin/genenetwork2 | 1 + etc/default_settings.py | 1 + wqflask/utility/tools.py | 12 ++++++++---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index 236d8f63..e07a4e32 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -78,6 +78,7 @@ else export PLINK_COMMAND="$GN2_PROFILE/bin/plink2" export PYLMM_COMMAND="$GN2_PROFILE/bin/pylmm_redis" export GEMMA_COMMAND="$GN2_PROFILE/bin/gemma" + export GEMMA_WRAPPER_COMMAND="$GN2_PROFILE/bin/gemma-wrapper" fi if [ -z $PYTHONPATH ] ; then echo "ERROR PYTHONPATH has not been set - use GN2_PROFILE!" diff --git a/etc/default_settings.py b/etc/default_settings.py index 1b609414..c00f6c8f 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -69,3 +69,4 @@ JS_GN_PATH = os.environ['HOME']+"/genenetwork/javascript" # PYLMM_COMMAND = str.strip(os.popen("which pylmm_redis").read()) # PLINK_COMMAND = str.strip(os.popen("which plink2").read()) # GEMMA_COMMAND = str.strip(os.popen("which gemma").read()) +# GEMMA_WRAPPER_COMMAND = str.strip(os.popen("which gemma-wrapper").read()) diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 867bc5c8..c5685cdd 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -113,6 +113,9 @@ def pylmm_command(guess=None): def gemma_command(guess=None): return assert_bin(get_setting("GEMMA_COMMAND",guess)) +def gemma_wrapper_command(guess=None): + return assert_bin(get_setting("GEMMA_WRAPPER_COMMAND",guess)) + def plink_command(guess=None): return assert_bin(get_setting("PLINK_COMMAND",guess)) @@ -240,10 +243,11 @@ JS_GUIX_PATH = get_setting('JS_GUIX_PATH') JS_GN_PATH = get_setting('JS_GN_PATH') # assert_dir(JS_GN_PATH) -PYLMM_COMMAND = pylmm_command() -GEMMA_COMMAND = gemma_command() -PLINK_COMMAND = plink_command() -TEMPDIR = tempdir() # defaults to UNIX TMPDIR +PYLMM_COMMAND = pylmm_command() +GEMMA_COMMAND = gemma_command() +GEMMA_WRAPPER_COMMAND = gemma_wrapper_command() +PLINK_COMMAND = plink_command() +TEMPDIR = tempdir() # defaults to UNIX TMPDIR # ---- Handle specific JS modules JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH",js_path("Twitter-Post-Fetcher")) -- cgit v1.2.3 From 03e2facd6279f5413667dfcca4b2d223a54e4f4e Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 5 Sep 2017 16:48:55 +0000 Subject: Added file converting genofiles to bimbam --- wqflask/maintenance/convert_geno_to_bimbam.py | 239 ++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 wqflask/maintenance/convert_geno_to_bimbam.py diff --git a/wqflask/maintenance/convert_geno_to_bimbam.py b/wqflask/maintenance/convert_geno_to_bimbam.py new file mode 100644 index 00000000..05006d5c --- /dev/null +++ b/wqflask/maintenance/convert_geno_to_bimbam.py @@ -0,0 +1,239 @@ +#!/usr/bin/python + +""" +Convert .geno files to json + +This file goes through all of the genofiles in the genofile directory (.geno) +and converts them to json files that are used when running the marker regression +code + +""" + +from __future__ import print_function, division, absolute_import +import sys +sys.path.append("..") +import os +import glob +import traceback +import gzip + +#import numpy as np +#from pyLMM import lmm + +import simplejson as json + +from pprint import pformat as pf + +class EmptyConfigurations(Exception): pass + + + +class Marker(object): + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + +class ConvertGenoFile(object): + + def __init__(self, input_file, output_files): + + self.input_file = input_file + self.output_files = output_files + + self.mb_exists = False + self.cm_exists = False + self.markers = [] + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + + def convert(self): + + self.haplotype_notation = { + '@mat': "1", + '@pat': "0", + '@het': "0.5", + '@unk': "NA" + } + + self.configurations = {} + #self.skipped_cols = 3 + + #if self.input_file.endswith(".geno.gz"): + # print("self.input_file: ", self.input_file) + # self.input_fh = gzip.open(self.input_file) + #else: + self.input_fh = open(self.input_file) + + with open(self.output_files[0], "w") as self.geno_fh: + #if self.file_type == "geno": + self.process_csv() + #elif self.file_type == "snps": + # self.process_snps_file() + + + def process_csv(self): + for row_count, row in enumerate(self.process_rows()): + row_items = row.split("\t") + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + else: + this_marker.genotypes.append("NA") + + #print("this_marker is:", pf(this_marker.__dict__)) + #if this_marker.chr == "14": + self.markers.append(this_marker.__dict__) + + self.write_to_bimbam() + + # with open(self.output_file, 'w') as fh: + # json.dump(self.markers, fh, indent=" ", sort_keys=True) + + # print('configurations:', str(configurations)) + #self.latest_col_pos = item_count + self.skipped_cols + #self.latest_col_value = item + + #if item_count != 0: + # self.output_fh.write(" ") + #self.output_fh.write(self.configurations[item.upper()]) + + #self.output_fh.write("\n") + + def write_to_bimbam(self): + with open(self.output_files[0], "w") as geno_fh: + # geno_fh.write(str(len(self.sample_list)) + "\n") + # geno_fh.write("2\n") + # geno_fh.write("IND") + # for sample in self.sample_list: + # geno_fh.write(" " + sample) + # geno_fh.write("\n") + for marker in self.markers: + geno_fh.write(marker['name']) + geno_fh.write(", X, Y") + geno_fh.write(", " + ", ".join(marker['genotypes'])) + geno_fh.write("\n") + + #pheno_fh = open(self.output_files[1], 'w') + with open(self.output_files[1], "w") as pheno_fh: + for sample in self.sample_list: + pheno_fh.write("1\n") + + with open(self.output_files[2], "w") as snp_fh: + for marker in self.markers: + if self.mb_exists: + snp_fh.write(marker['name'] +", " + str(int(float(marker['Mb'])*1000000)) + ", " + marker['chr'] + "\n") + else: + snp_fh.write(marker['name'] +", " + str(int(float(marker['cM'])*1000000)) + ", " + marker['chr'] + "\n") + + + def get_sample_list(self, row_contents): + self.sample_list = [] + if self.mb_exists: + if self.cm_exists: + self.sample_list = row_contents[4:] + else: + self.sample_list = row_contents[3:] + else: + if self.cm_exists: + self.sample_list = row_contents[3:] + else: + self.sample_list = row_contents[2:] + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + #if self.input_file.endswith(".geno.gz"): + # print("row: ", row) + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + self.get_sample_list(row.split()) + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + @classmethod + def process_all(cls, old_directory, new_directory): + os.chdir(old_directory) + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue + group_name = ".".join(input_file.split('.')[:-1]) + geno_output_file = os.path.join(new_directory, group_name + "_geno.txt") + pheno_output_file = os.path.join(new_directory, group_name + "_pheno.txt") + snp_output_file = os.path.join(new_directory, group_name + "_snps.txt") + output_files = [geno_output_file, pheno_output_file, snp_output_file] + print("%s -> %s" % ( + os.path.join(old_directory, input_file), geno_output_file)) + convertob = ConvertGenoFile(input_file, output_files) + try: + convertob.convert() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + #excepted = True + continue + except Exception as why: + + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + #def process_snps_file(cls, snps_file, new_directory): + # output_file = os.path.join(new_directory, "mouse_families.json") + # print("%s -> %s" % (snps_file, output_file)) + # convertob = ConvertGenoFile(input_file, output_file) + + +if __name__=="__main__": + Old_Geno_Directory = """/home/zas1024/genotype_files/genotype/""" + New_Geno_Directory = """/home/zas1024/genotype_files/genotype/bimbam/""" + #Input_File = """/home/zas1024/gene/genotype_files/genotypes/BXD.geno""" + #Output_File = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/bxd.snps""" + #convertob = ConvertGenoFile("/home/zas1024/gene/genotype_files/genotypes/SRxSHRSPF2.geno", "/home/zas1024/gene/genotype_files/new_genotypes/SRxSHRSPF2.json") + #convertob.convert() + ConvertGenoFile.process_all(Old_Geno_Directory, New_Geno_Directory) + #ConvertGenoFiles(Geno_Directory) + + #process_csv(Input_File, Output_File) \ No newline at end of file -- cgit v1.2.3 From 425f0e9d8977f8cf741f596315a56c91b750988a Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 7 Sep 2017 16:13:42 +0000 Subject: Added the script to convert bimbam to kinship matrices --- .../maintenance/generate_kinship_from_bimbam.py | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 wqflask/maintenance/generate_kinship_from_bimbam.py diff --git a/wqflask/maintenance/generate_kinship_from_bimbam.py b/wqflask/maintenance/generate_kinship_from_bimbam.py new file mode 100644 index 00000000..f322341d --- /dev/null +++ b/wqflask/maintenance/generate_kinship_from_bimbam.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +""" +Generate relatedness matrix files for GEMMA from BIMBAM genotype/phenotype files + +This file goes through all of the BIMBAM files in the bimbam diretory +and uses GEMMA to generate their corresponding kinship/relatedness matrix file + +""" + +from __future__ import print_function, division, absolute_import +import sys +sys.path.append("..") +import os +import glob + +class GenerateKinshipMatrices(object): + def __init__(self, group_name, geno_file, pheno_file): + self.group_name = group_name + self.geno_file = geno_file + self.pheno_file = pheno_file + + def generate_kinship(self): + gemma_command = "/gnu/store/xhzgjr0jvakxv6h3blj8z496xjig69b0-profile/bin/gemma -g " + self.geno_file + " -p " + self.pheno_file + " -gk 1 -outdir /home/zas1024/genotype_files/genotype/bimbam/ -o " + self.group_name + print("command:", gemma_command) + os.system(gemma_command) + + @classmethod + def process_all(self, geno_dir, bimbam_dir): + os.chdir(geno_dir) + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue + group_name = ".".join(input_file.split('.')[:-1]) + geno_input_file = os.path.join(bimbam_dir, group_name + "_geno.txt") + pheno_input_file = os.path.join(bimbam_dir, group_name + "_pheno.txt") + convertob = GenerateKinshipMatrices(group_name, geno_input_file, pheno_input_file) + try: + convertob.generate_kinship() + except EmptyConfigurations as why: + print(" No config info? Continuing...") + continue + except Exception as why: + + print(" Exception:", why) + print(traceback.print_exc()) + print(" Found in row %s at tabular column %s" % (convertob.latest_row_pos, + convertob.latest_col_pos)) + print(" Column is:", convertob.latest_col_value) + print(" Row is:", convertob.latest_row_value) + break + + +if __name__=="__main__": + Geno_Directory = """/home/zas1024/genotype_files/genotype/""" + Bimbam_Directory = """/home/zas1024/genotype_files/genotype/bimbam/""" + GenerateKinshipMatrices.process_all(Geno_Directory, Bimbam_Directory) + + #./gemma -g /home/zas1024/genotype_files/genotype/bimbam/BXD_geno.txt -p /home/zas1024/genotype_files/genotype/bimbam/BXD_pheno.txt -gk 1 -o BXD \ No newline at end of file -- cgit v1.2.3