# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License # as published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Affero General Public License for more details. # # This program is available from Source Forge: at GeneNetwork Project # (sourceforge.net/projects/genenetwork/). # # Contact Dr. Robert W. Williams at rwilliams@uthsc.edu # # # This module is used by GeneNetwork project (www.genenetwork.org) import scipy import simplejson as json from gn2.base.trait import create_trait from gn2.base import data_set from gn2.utility import helper_functions from gn2.utility import corr_result_helpers from gn2.utility.tools import GN2_BRANCH_URL class NetworkGraph: def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) # ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.lowest_overlap = 8 self.nodes_list = [] self.edges_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data corr_result_row = [] is_spearman = False # ZS: To determine if it's above or below the diagonal max_corr = 0 # ZS: Used to determine whether node should be hidden when correlation coefficient slider is used for target in self.trait_list: target_trait = target[0] target_db = target[1] if str(this_trait) == str(target_trait) and str(this_db) == str(target_db): continue target_samples = target_db.group.all_samples_ordered() target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap < 2: continue else: pearson_r, pearson_p = scipy.stats.pearsonr( this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: continue else: sample_r, sample_p = scipy.stats.spearmanr( this_trait_vals, target_vals) if -1 <= sample_r < -0.7: color = "#0000ff" width = 3 elif -0.7 <= sample_r < -0.5: color = "#00ff00" width = 2 elif -0.5 <= sample_r < 0: color = "#000000" width = 0.5 elif 0 <= sample_r < 0.5: color = "#ffc0cb" width = 0.5 elif 0.5 <= sample_r < 0.7: color = "#ffa500" width = 2 elif 0.7 <= sample_r <= 1: color = "#ff0000" width = 3 else: color = "#000000" width = 0 if abs(sample_r) > max_corr: max_corr = abs(sample_r) edge_data = {'id': f"{str(this_trait.name)}:{str(this_trait.dataset.name)}" + '_to_' + f"{str(target_trait.name)}:{str(target_trait.dataset.name)}", 'source': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target': str(target_trait.name) + ":" + str(target_trait.dataset.name), 'correlation': round(sample_r, 3), 'abs_corr': abs(round(sample_r, 3)), 'p_value': round(sample_p, 3), 'overlap': num_overlap, 'color': color, 'width': width} edge_dict = {'data': edge_data} self.edges_list.append(edge_dict) if trait_db[1].type == "ProbeSet": node_dict = {'data': {'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.symbol, 'symbol': this_trait.symbol, 'geneid': this_trait.geneid, 'omim': this_trait.omim, 'max_corr': max_corr}} elif trait_db[1].type == "Publish": node_dict = {'data': {'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.name, 'max_corr': max_corr}} else: node_dict = {'data': {'id': str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label': this_trait.name, 'max_corr': max_corr}} self.nodes_list.append(node_dict) self.elements = json.dumps(self.nodes_list + self.edges_list) self.gn2_url = GN2_BRANCH_URL groups = [] for sample in self.all_sample_list: groups.append(1) self.js_data = dict(traits=[trait.name for trait in self.traits], groups=groups, cols=list(range(len(self.traits))), rows=list(range(len(self.traits))), samples=self.all_sample_list, sample_data=self.sample_data, elements=self.elements,)