diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/base/data_set.py | 4 | ||||
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 14 | ||||
-rw-r--r-- | wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py | 14 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/correlation_functions.py | 6 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/correlation_gn3_api.py | 205 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/templates/test_correlation_page.html | 159 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 11 |
8 files changed, 405 insertions, 10 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index e20f2f98..75ddf278 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -124,6 +124,7 @@ class DatasetType: self.datasets[short_dataset_name] = new_type except Exception: # Do nothing pass + self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) self.data = data @@ -165,6 +166,7 @@ class DatasetType: if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") + results = g.db.execute(sql_query_mapping[t] % group_name).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] @@ -646,6 +648,8 @@ class DataSet: "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass + + def get_trait_data(self, sample_list=None): if sample_list: self.samplelist = sample_list diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 882ae911..8f8e2b0a 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -6,6 +6,7 @@ from utility import db_tools from utility import Bunch from utility.db_tools import escape +from gn3.db_utils import database_connector from utility.logger import getLogger @@ -45,16 +46,18 @@ class MrnaAssayTissueData: and t.Mean = x.maxmean; '''.format(in_clause) - results = g.db.execute(query).fetchall() - lower_symbols = [] + # lower_symbols = [] + lower_symbols = {} for gene_symbol in gene_symbols: + # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: - lower_symbols.append(gene_symbol.lower()) - + lower_symbols[gene_symbol.lower()] = True + results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - if symbol.lower() in lower_symbols: + if symbol is not None and lower_symbols.get(symbol.lower()): + symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -83,6 +86,7 @@ class MrnaAssayTissueData: WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py new file mode 100644 index 00000000..e1bd6d86 --- /dev/null +++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py @@ -0,0 +1,14 @@ +"""this module contains tests for code used in integrating to gn3 api""" +from unittest import TestCase +from base.data_set import create_dataset + +class TestCorrelation(TestCase): + + def test_create_dataset(self): + """test for creating datasets""" + + pass + def test_fetch_dataset_info(self): + """test for fetching dataset info data""" + + pass diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index 0f24241a..2d25fbd8 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss(primaryValue=[], targetValue=[], method='pearso def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) - - if len(tissue_data.gene_symbols): - return tissue_data.get_symbol_values_pairs() + if len(tissue_data.gene_symbols) >0: + results = tissue_data.get_symbol_values_pairs() + return results diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py new file mode 100644 index 00000000..98d52591 --- /dev/null +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -0,0 +1,205 @@ +"""module that calls the gn3 api's to do the correlation """ +import json + +from wqflask.correlation import correlation_functions + +from base import data_set + +from base.trait import create_trait +from base.trait import retrieve_sample_data + +from gn3.computations.correlations import compute_all_sample_correlation +from gn3.computations.correlations import map_shared_keys_to_values +from gn3.computations.correlations import compute_all_lit_correlation +from gn3.computations.correlations import compute_tissue_correlation +from gn3.db_utils import database_connector + + +def create_target_this_trait(start_vars): + """this function creates the required trait and target dataset for correlation""" + + this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['corr_dataset']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) + sample_data = () + return (this_dataset, this_trait, target_dataset, sample_data) + + +def process_samples(start_vars, sample_names, excluded_samples=None): + """process samples""" + sample_data = {} + if not excluded_samples: + excluded_samples = () + sample_vals_dict = json.loads(start_vars["sample_vals"]) + for sample in sample_names: + if sample not in excluded_samples: + val = sample_vals_dict[sample] + if not val.strip().lower() == "x": + sample_data[str(sample)] = float(val) + return sample_data + + +def sample_for_trait_lists(corr_results, target_dataset, + this_trait, this_dataset, start_vars): + """interface function for correlation on top results""" + + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + # should filter target traits from here + _corr_results = corr_results + + this_trait = retrieve_sample_data(this_trait, this_dataset) + + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + correlation_results = compute_all_sample_correlation(corr_method="pearson", + this_trait=this_trait_data, + target_dataset=results) + + return correlation_results + + +def tissue_for_trait_lists(corr_results, this_dataset, this_trait): + """interface function for doing tissue corr_results on trait_list""" + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + traits_symbol_dict = this_dataset.retrieve_genes("Symbol") + traits_symbol_dict = dict({trait_name: symbol for ( + trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, traits_symbol_dict) + corr_results = compute_tissue_correlation( + primary_tissue_dict=primary_tissue_data, + target_tissues_data=target_tissue_data, + corr_method="pearson") + return corr_results + + +def lit_for_trait_list(corr_results, this_dataset, this_trait): + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset) + + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + + geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict.items() if + trait_lists.get(trait_name)} + + conn, _cursor_object = database_connector() + + with conn: + + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + return correlation_results + + +def compute_correlation(start_vars, method="pearson"): + """compute correlation for to call gn3 api""" + # pylint: disable-msg=too-many-locals + + corr_type = start_vars['corr_type'] + + (this_dataset, this_trait, target_dataset, + sample_data) = create_target_this_trait(start_vars) + + method = start_vars['corr_sample_method'] + corr_return_results = int(start_vars.get("corr_return_results", 100)) + corr_input_data = {} + + if corr_type == "sample": + + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + this_trait = retrieve_sample_data(this_trait, this_dataset) + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + correlation_results = compute_all_sample_correlation(corr_method=method, + this_trait=this_trait_data, + target_dataset=results) + + # do tissue correaltion + + # code to be use later + + # tissue_result = tissue_for_trait_lists( + # correlation_results, this_dataset, this_trait) + # # lit spoils the party so slow + # lit_result = lit_for_trait_list( + # correlation_results, this_dataset, this_trait) + + + elif corr_type == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, trait_symbol_dict) + + corr_input_data = { + "primary_tissue": primary_tissue_data, + "target_tissues_dict": target_tissue_data + } + correlation_results = compute_tissue_correlation( + primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data[ + "target_tissues_dict"], + corr_method=method + + ) + + elif corr_type == "lit": + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset) + + conn, _cursor_object = database_connector() + with conn: + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + return correlation_results[0:corr_return_results] + + +def do_lit_correlation(this_trait, this_dataset): + """function for fetching lit inputs""" + geneid_dict = this_dataset.retrieve_genes("GeneId") + species = this_dataset.group.species.lower() + trait_geneid = this_trait.geneid + return (trait_geneid, geneid_dict, species) + + +def get_tissue_correlation_input(this_trait, trait_symbol_dict): + """Gets tissue expression values for the primary trait and target tissues values""" + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + primary_tissue_data = { + "this_id": this_trait.name, + "tissue_values": primary_trait_tissue_values + + } + target_tissue_data = { + "trait_symbol_dict": trait_symbol_dict, + "symbol_tissue_vals_dict": corr_result_tissue_vals_dict + } + return (primary_tissue_data, target_tissue_data) + return None diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 9b0b6118..7fc9f955 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -96,6 +96,7 @@ class CorrelationResults: self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) + if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars and 'max_loc_mb' in start_vars): @@ -200,6 +201,7 @@ class CorrelationResults: if chr_info.name == trait_object.chr: chr_as_int = order_id + if (float(self.correlation_data[trait][0]) >= self.p_range_lower and float(self.correlation_data[trait][0]) <= self.p_range_upper): diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html new file mode 100644 index 00000000..037e9735 --- /dev/null +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -0,0 +1,159 @@ +{% extends "base.html" %} +{% block title %}Correlation Results{% endblock %} +{% block css %} + <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='DataTables/css/jquery.dataTables.css') }}" /> + <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonsBootstrap/css/buttons.bootstrap.css') }}" /> + <link rel="stylesheet" type="text/css" href="{{ url_for('js', filename='DataTablesExtensions/buttonStyles/css/buttons.dataTables.min.css') }}"> + <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css"> + <link rel="stylesheet" type="text/css" href="/static/new/css/trait_list.css" /> + <link rel="stylesheet" type="text/css" href="/static/new/css/show_trait.css" /> + + <style type="text/css"> + .td-styles{ + height: 40px; + text-align: center; + } + .trait_col { + font-weight:bolder; + text-align: center; + color:#036ffc; + /*font-size: 1.1em;*/ + } + table th { + font-weight: bolder; + text-transform: uppercase; + } + .correlation-title { + padding:25px 10px; + } + .correlation-title h3 span { + font-weight: bolder; + } + .header-toggle-vis { + padding:10px 5px; + } + .header-toggle-vis button { + border-radius: 5px; + + } + </style> +{% endblock %} + +{% block content %} + +<div class="correlation-title"> + <h3>Correlation Results for <span>Dataset_name</span> against <span><a href="">trait_name</a></span> for the top <span>all</span> Results</h3> +</div> +<div class="header-toggle-vis"> + <h4 style="font-weight: bolder;padding: 5px 3px;">Toggle Columns</h4> + <button class="toggle-vis" data-column="1">Index</button> + <button class="toggle-vis" data-column="2">Trait Name</button> + <button class="toggle-vis" data-column="3">Sample r</button> + <button class="toggle-vis" data-column="4">Sample P(r)</button> + <button class="toggle-vis" data-column="5">Num overlap</button> +</div> + <table id="example" class="display" width="100%"> + <thead> + <tr > + <th></th> + <th>index</th> + <th>trait_name</th> + <th>Sample r</th> + <th>Sample r(p)</th> + <th>N</th> + <th>Tissue r</th> + <th>Tissue r(p)</th> + <th>Lit r</th> + </tr> + </thead> + </table> + +{% endblock %} + +{% block js %} +<script type="text/javascript" src="{{ url_for('js', filename='js_alt/md5.min.js') }}"></script> +<script type="text/javascript" src="/static/new/javascript/search_results.js"></script> + +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/underscore.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='jszip/jszip.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='js_alt/underscore.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/dataTables.buttons.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/buttons/js/buttons.html5.min.js') }}"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/plugins/sorting/natural.js') }}"></script> +<script language="javascript" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/js/all.min.js"></script> +<script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTablesExtensions/scroller/js/dataTables.scroller.min.js') }}"></script> +<script type="text/javascript"> + console.log("running this script") + let correlationResults = {{correlation_results|safe}} + // document.querySelector(".content").innerHTML =correlationResults + // parse the data + let counter = 0; + let corr_type = "tissue"; + correlationResults =correlationResults.map((trait_object)=>{ + let trait_name = Object.keys(trait_object)[0] + + let new_dict = { + "index":counter, + "trait_name":trait_name, + ...trait_object[trait_name] + } + counter++; + return new_dict; + }) + +console.log(correlationResults) + +</script> + +<script type="text/javascript"> + $(document).ready(function() { + let table = $('#example').DataTable( { + "data": correlationResults, + "columns": [ + {"data":corr_type=="sample"?null:"fd","width":"25px"}, + { "data": "index","width":"120px","title":"Index" }, + { "data": "trait_name","title":"TraitName"}, + { "data": "corr_coeffient","defaultContent": "--"}, + { "data": "p_value","defaultContent":"--"}, + { "data": "num_overlap","defaultContent":"--"}, + {"data":"tissue_corr","defaultContent":"--","title":"Tissue r"}, + {"data":"tissue_p_val","defaultContent":"--","title":"Tissue r(p)"}, + {"data":"lit_corr","defaultContent":"--","title":"Lit rho"} + ], + "columnDefs": [ + { + targets:0, + data:null, + defaultContent: '', + orderable: false, + className: 'select-checkbox', + "render":(data,type,row)=>{ + return `<input type="checkbox" class="checkbox trait_checkbox" value="other">` + } + + }, + {className:"trait_col",targets:2}, + {className: "td-styles", targets: "_all"}, + { + "targets":2, + "render":(data,type,row)=>{ + let urlLink = "/show_trait?trait_id=1453207_at&dataset=HC_M2_0606_P" + let traitLink = `<a href=${urlLink}>${data}</a>` + return traitLink + }, + } + + ] + } ); + + $(":button.toggle-vis").on("click",function(e){ + e.preventDefault() + let column = table.column($(this).attr("data-column")); + column.visible(!column.visible()) + console.log($(this).attr("data-column")) + }) +} ); +</script> + +{% endblock %}
\ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 276d3019..317c5f63 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -47,6 +47,7 @@ from wqflask.marker_regression import run_mapping from wqflask.marker_regression import display_mapping_results from wqflask.network_graph import network_graph from wqflask.correlation import show_corr_results +from wqflask.correlation.correlation_gn3_api import compute_correlation from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis @@ -947,8 +948,14 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - template_vars = show_corr_results.CorrelationResults(request.form) - return render_template("correlation_page.html", **template_vars.__dict__) + # template_vars = show_corr_results.CorrelationResults(request.form) + # return render_template("correlation_page.html", **template_vars.__dict__) + + # to test the new correlation api uncomment these lines + + correlation_results = compute_correlation(request.form) + # print(">>>>Time taken by this endpoint",time.time()-initial_time) + return render_template("test_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) |