From 1a663f987bf3a640d21c2c89402318d5433efd9e Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 4 Jun 2020 14:23:30 -0500 Subject: Really should have split this into many more commits: - Now use proxy to pull trait data and hide traits/results that the user doesn't have view permission for - Created a factory method for creating trait ob so it can return None when user doesn't have view permissions (this is why such a large number of files are changed) - Added metadata to permutation export - Added current group management code - Added fixed password verification e-mail code --- wqflask/base/trait.py | 177 ++++---- wqflask/utility/helper_functions.py | 11 +- wqflask/utility/redis_tools.py | 58 +-- wqflask/wqflask/api/correlation.py | 472 ++++++++++----------- wqflask/wqflask/api/gen_menu.py | 11 +- wqflask/wqflask/api/mapping.py | 4 +- wqflask/wqflask/collect.py | 15 +- .../comparison_bar_chart/comparison_bar_chart.py | 4 +- wqflask/wqflask/correlation/corr_scatter_plot.py | 8 +- wqflask/wqflask/correlation/show_corr_results.py | 8 +- .../wqflask/correlation_matrix/show_corr_matrix.py | 22 +- wqflask/wqflask/ctl/ctl_analysis.py | 10 +- wqflask/wqflask/do_search.py | 11 +- wqflask/wqflask/gsearch.py | 13 +- .../marker_regression/display_mapping_results.py | 6 + wqflask/wqflask/marker_regression/gemma_mapping.py | 4 +- wqflask/wqflask/marker_regression/rqtl_mapping.py | 64 ++- wqflask/wqflask/marker_regression/run_mapping.py | 3 +- wqflask/wqflask/network_graph/network_graph.py | 4 +- wqflask/wqflask/search_results.py | 106 ++--- wqflask/wqflask/show_trait/export_trait_data.py | 4 +- wqflask/wqflask/show_trait/show_trait.py | 21 +- wqflask/wqflask/templates/admin/group_manager.html | 45 +- wqflask/wqflask/templates/correlation_page.html | 1 + wqflask/wqflask/templates/email/verification.txt | 7 - wqflask/wqflask/templates/gsearch_pheno.html | 2 +- wqflask/wqflask/templates/mapping_results.html | 31 +- wqflask/wqflask/user_login.py | 43 +- wqflask/wqflask/user_session.py | 18 +- wqflask/wqflask/views.py | 71 +++- 30 files changed, 637 insertions(+), 617 deletions(-) delete mode 100644 wqflask/wqflask/templates/email/verification.txt diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 1b7cb23c..b133bf21 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -11,6 +11,7 @@ from base.data_set import create_dataset from db import webqtlDatabaseFunction from utility import webqtlUtil from utility import hmac +from utility.authentication_tools import check_resource_availability from utility.tools import GN2_BASE_URL from utility.redis_tools import get_redis_conn Redis = get_redis_conn() @@ -21,11 +22,33 @@ import simplejson as json from MySQLdb import escape_string as escape from pprint import pformat as pf -from flask import Flask, g, request, url_for +from flask import Flask, g, request, url_for, redirect from utility.logger import getLogger logger = getLogger(__name__ ) +def create_trait(**kw): + assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + + permitted = True + if kw.get('name'): + if kw.get('dataset_name'): + if kw.get('dataset_name') != "Temp": + dataset = create_dataset(kw.get('dataset_name')) + else: + dataset = kw.get('dataset') + + if kw.get('dataset_name') != "Temp": + if dataset.type == 'Publish': + permitted = check_resource_availability(dataset, kw.get('name')) + else: + permitted = check_resource_availability(dataset) + + if permitted: + return GeneralTrait(**kw) + else: + return None + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -50,6 +73,7 @@ class GeneralTrait(object): self.haveinfo = kw.get('haveinfo', False) self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet self.data = kw.get('data', {}) + self.view = True # Sets defaults self.locus = None @@ -77,6 +101,7 @@ class GeneralTrait(object): # So we could add a simple if statement to short-circuit this if necessary if self.dataset.type != "Temp": self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info) + if get_sample_info != False: self = retrieve_sample_data(self, self.dataset) @@ -212,26 +237,28 @@ def get_sample_data(): trait = params['trait'] dataset = params['dataset'] - trait_ob = GeneralTrait(name=trait, dataset_name=dataset) - - trait_dict = {} - trait_dict['name'] = trait - trait_dict['db'] = dataset - trait_dict['type'] = trait_ob.dataset.type - trait_dict['group'] = trait_ob.dataset.group.name - trait_dict['tissue'] = trait_ob.dataset.tissue - trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) - trait_dict['description'] = trait_ob.description_display - if trait_ob.dataset.type == "ProbeSet": - trait_dict['symbol'] = trait_ob.symbol - trait_dict['location'] = trait_ob.location_repr - elif trait_ob.dataset.type == "Publish": - if trait_ob.pubmed_id: - trait_dict['pubmed_link'] = trait_ob.pubmed_link - trait_dict['pubmed_text'] = trait_ob.pubmed_text - - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + trait_ob = create_trait(name=trait, dataset_name=dataset) + if trait_ob: + trait_dict = {} + trait_dict['name'] = trait + trait_dict['db'] = dataset + trait_dict['type'] = trait_ob.dataset.type + trait_dict['group'] = trait_ob.dataset.group.name + trait_dict['tissue'] = trait_ob.dataset.tissue + trait_dict['species'] = trait_ob.dataset.group.species + trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) + trait_dict['description'] = trait_ob.description_display + if trait_ob.dataset.type == "ProbeSet": + trait_dict['symbol'] = trait_ob.symbol + trait_dict['location'] = trait_ob.location_repr + elif trait_ob.dataset.type == "Publish": + if trait_ob.pubmed_id: + trait_dict['pubmed_link'] = trait_ob.pubmed_link + trait_dict['pubmed_text'] = trait_ob.pubmed_text + + return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + else: + return None def jsonable(trait): """Return a dict suitable for using as json @@ -350,91 +377,36 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): assert dataset, "Dataset doesn't exist" if dataset.type == 'Publish': - resource_id = hmac.data_hmac("{}:{}".format(dataset.id, trait.name)) - - the_url = "http://localhost:8080/run_action/?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) - trait_data = json.loads(requests.get("http://localhost:8080/run_action/?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id))) - - query = """ - SELECT - PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, - Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, - Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, PublishXRef.mean, - Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, - Publication.Authors, Publication.Title, Publication.Abstract, - Publication.Journal, Publication.Volume, Publication.Pages, - Publication.Month, Publication.Year, PublishXRef.Sequence, - Phenotype.Units, PublishXRef.comments - FROM - PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet - WHERE - PublishXRef.Id = %s AND - Phenotype.Id = PublishXRef.PhenotypeId AND - Publication.Id = PublishXRef.PublicationId AND - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishXRef.InbredSetId = InbredSet.Id AND - PublishFreeze.Id = %s - """ % (trait.name, dataset.id) - - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - - - #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + resource_id = hmac.hmac_creation("{}:{}:{}".format('dataset-publish', dataset.id, trait.name)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) elif dataset.type == 'ProbeSet': - display_fields_string = ', ProbeSet.'.join(dataset.display_fields) - display_fields_string = 'ProbeSet.' + display_fields_string - query = """ - SELECT %s - FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name - # to avoid the problem of same marker name from different species. - elif dataset.type == 'Geno': - display_fields_string = string.join(dataset.display_fields,',Geno.') - display_fields_string = 'Geno.' + display_fields_string - query = """ - SELECT %s - FROM Geno, GenoFreeze, GenoXRef - WHERE - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoXRef.GenoId = Geno.Id AND - GenoFreeze.Name = '%s' AND - Geno.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - else: #Temp type - query = """SELECT %s FROM %s WHERE Name = %s""" - logger.sql(query) - trait_info = g.db.execute(query, - (string.join(dataset.display_fields,','), - dataset.type, trait.name)).fetchone() + resource_id = hmac.hmac_creation("{}:{}".format('dataset-probeset', dataset.id)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + else: + resource_id = hmac.hmac_creation("{}:{}".format('dataset-geno', dataset.id)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + + try: + response = requests.get(the_url).content + if response.strip() == "no-access": + trait.view = False + return trait + except: + resource_info = get_resource_info(resource_id) + default_permissions = resource_info['default_mask']['data'] + if 'view' not in default_persmissions: + trait.view = False + return trait + + trait_info = json.loads(response) if trait_info: trait.haveinfo = True - #XZ: assign SQL query result to trait attributes. for i, field in enumerate(dataset.display_fields): holder = trait_info[i] - # if isinstance(trait_info[i], basestring): - # logger.debug("HOLDER:", holder) - # logger.debug("HOLDER2:", holder.decode(encoding='latin1')) - # holder = unicode(trait_info[i], "utf-8", "ignore") - if isinstance(trait_info[i], basestring): - holder = holder.encode('latin1') + #if isinstance(trait_info[i], basestring): + # holder = holder.encode('latin1') setattr(trait, field, holder) if dataset.type == 'Publish': @@ -453,13 +425,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if trait.confidential: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description - - #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( - # privilege=self.dataset.privilege, - # userName=self.dataset.userName, - # authorized_users=self.authorized_users): - # - # description = self.pre_publication_description else: trait.abbreviation = trait.post_publication_abbreviation if description: diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index e7c04fef..9ce809b6 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, print_function, division -from base.trait import GeneralTrait from base import data_set +from base.trait import create_trait from base.species import TheSpecies from utility import hmac @@ -11,7 +11,6 @@ from flask import Flask, g import logging logger = logging.getLogger(__name__ ) - def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" if "temp_trait" in start_vars.keys(): @@ -24,7 +23,7 @@ def get_species_dataset_trait(self, start_vars): logger.debug("After creating dataset") self.species = TheSpecies(dataset=self.dataset) logger.debug("After creating species") - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None, get_qtl_info=True) @@ -34,7 +33,6 @@ def get_species_dataset_trait(self, start_vars): #self.dataset.group.read_genotype_file() #self.genotype = self.dataset.group.genotype - def get_trait_db_obs(self, trait_db_list): if isinstance(trait_db_list, basestring): trait_db_list = trait_db_list.split(",") @@ -49,10 +47,11 @@ def get_trait_db_obs(self, trait_db_list): dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2]) else: dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) + if trait_ob: + self.trait_list.append((trait_ob, dataset_ob)) def get_species_groups(): diff --git a/wqflask/utility/redis_tools.py b/wqflask/utility/redis_tools.py index 15841032..0ad96879 100644 --- a/wqflask/utility/redis_tools.py +++ b/wqflask/utility/redis_tools.py @@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import import uuid import simplejson as json +import datetime import redis # used for collections @@ -96,15 +97,22 @@ def get_user_groups(user_id): for key in groups_list: group_ob = json.loads(groups_list[key]) group_admins = set(group_ob['admins']) - group_users = set(group_ob['users']) + group_members = set(group_ob['members']) if user_id in group_admins: admin_group_ids.append(group_ob['id']) - elif user_id in group_users: + elif user_id in group_members: user_group_ids.append(group_ob['id']) else: continue - return admin_group_ids, user_group_ids + admin_groups = [] + user_groups = [] + for the_id in admin_group_ids: + admin_groups.append(get_group_info(the_id)) + for the_id in user_group_ids: + user_groups.append(get_group_info(the_id)) + + return admin_groups, user_groups def get_group_info(group_id): group_json = Redis.hget("groups", group_id) @@ -114,18 +122,18 @@ def get_group_info(group_id): return group_info -def create_group(admin_member_ids, user_member_ids = [], group_name = ""): +def create_group(admin_user_ids, member_user_ids = [], group_name = "Default Group Name"): group_id = str(uuid.uuid4()) new_group = { "id" : group_id, - "admins": admin_member_ids, - "users" : user_member_ids, + "admins": admin_user_ids, + "members" : member_user_ids, "name" : group_name, "created_timestamp": datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), "changed_timestamp": datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') } - Redis.hset("groups", group_id, new_group) + Redis.hset("groups", group_id, json.dumps(new_group)) return new_group @@ -144,7 +152,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS if admins: group_users = set(group_info["admins"]) else: - group_users = set(group_info["users"]) + group_users = set(group_info["members"]) for email in user_emails: user_id = get_user_id("email_address", email) @@ -153,7 +161,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS if admins: group_info["admins"] = list(group_users) else: - group_info["users"] = list(group_users) + group_info["members"] = list(group_users) group_info["changed_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') Redis.hset("groups", group_id, json.dumps(group_info)) @@ -161,7 +169,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS else: return None -def remove_users_from_group(user_id, users_to_remove_ids, group_id, user_type = "users"): #ZS: User type is because I assume admins can remove other admins +def remove_users_from_group(user_id, users_to_remove_ids, group_id, user_type = "members"): #ZS: User type is because I assume admins can remove other admins group_info = get_group_info(group_id) if user_id in group_info["admins"]: group_users = set(group_info[user_type]) @@ -174,6 +182,7 @@ def change_group_name(user_id, group_id, new_name): group_info = get_group_info(group_id) if user_id in group_info["admins"]: group_info["name"] = new_name + Redis.hset("groups", group_id, json.dumps(group_info)) return group_info else: return None @@ -182,22 +191,21 @@ def get_resources(): resource_list = Redis.hgetall("resources") return resource_list -def get_resource_id(dataset_type, dataset_id, trait_id = None, all_resources = None): - if not all_resources: - all_resources = get_resources() - - resource_list = [[key, json.loads(value)] for key, value in all_resources.items()] - - if not trait_id: - matched_resources = [resource[0] for resource in resource_list if resource[1]['data']['dataset'] == dataset_id] - else: - matched_resources = [resource[0] for resource in resource_list if resource[1]['data']['dataset'] == dataset_id and resource[1]['data']['trait'] == trait_id] - - if len(matched_resources): - return matched_resources[0] +def get_resource_id(dataset, trait_id=None): + if dataset.type == "Publish": + if trait_id: + resource_id = hmac.hmac_creation("{}:{}:{}".format('dataset-publish', dataset.id, trait_id)) + else: + return False + elif dataset.type == "ProbeSet": + resource_id = hmac.hmac_creation("{}:{}".format('dataset-probeset', dataset.id)) + elif dataset.type == "Geno": + resource_id = hmac.hmac_creation("{}:{}".format('dataset-geno', dataset.id)) else: return False + return resource_id + def get_resource_info(resource_id): resource_info = Redis.hget("resources", resource_id) return json.loads(resource_info) @@ -205,9 +213,9 @@ def get_resource_info(resource_id): def add_resource(resource_info): if 'trait' in resource_info['data']: - resource_id = hmac.data_hmac('{}:{}'.format(str(resource_info['data']['dataset']), str(resource_info['data']['trait']))) + resource_id = hmac.hmac_creation('{}:{}:{}'.format(str(resource_info['type']), str(resource_info['data']['dataset']), str(resource_info['data']['trait']))) else: - resource_id = hmac.data_hmac('{}'.format(str(resource_info['data']['dataset']))) + resource_id = hmac.hmac_creation('{}:{}'.format(str(resource_info['type']), str(resource_info['data']['dataset']))) Redis.hset("resources", resource_id, json.dumps(resource_info)) diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index 66eb94ac..7f5312c1 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -1,237 +1,237 @@ -from __future__ import absolute_import, division, print_function - -import collections - -import scipy - -from MySQLdb import escape_string as escape - -from flask import g - -from base import data_set -from base.trait import GeneralTrait, retrieve_sample_data - -from wqflask.correlation.show_corr_results import generate_corr_json -from wqflask.correlation import correlation_functions - -from utility import webqtlUtil, helper_functions, corr_result_helpers -from utility.benchmark import Bench - -import utility.logger -logger = utility.logger.getLogger(__name__ ) - -def do_correlation(start_vars): - assert('db' in start_vars) - assert('target_db' in start_vars) - assert('trait_id' in start_vars) - - this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) - this_trait = GeneralTrait(dataset = this_dataset, name = start_vars['trait_id']) - this_trait = retrieve_sample_data(this_trait, this_dataset) - - corr_params = init_corr_params(start_vars) - - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) - #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) - - final_results = [] - for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]): - if corr_params['type'] == "tissue": - [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p, - "symbol" : symbol - } - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": - [gene_id, sample_r] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "gene_id" : gene_id - } - else: - [sample_r, sample_p, num_overlap] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p - } - - final_results.append(result_dict) - - # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True) - - return final_results - -def calculate_results(this_trait, this_dataset, target_dataset, corr_params): - corr_results = {} - - target_dataset.get_trait_data() - - if corr_params['type'] == "tissue": - trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) - sorted_results = collections.OrderedDict(sorted(corr_results.items(), - key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" - trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) - sorted_results = collections.OrderedDict(sorted(corr_results.items(), - key=lambda t: -abs(t[1][1]))) - else: - for target_trait, target_vals in target_dataset.trait_data.iteritems(): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) - if result is not None: - corr_results[target_trait] = result - - sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) - - return sorted_results - -def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) - - if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] - - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values()) - - tissue_corr_data = {} - for trait, symbol in trait_symbol_dict.iteritems(): - if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] - - result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - corr_params['method']) - - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] - - return tissue_corr_data - -def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) - - lit_corr_data = {} - for trait, gene_id in trait_geneid_dict.iteritems(): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) - - if mouse_gene_id and str(mouse_gene_id).find(";") == -1: - result = g.db.execute( - """SELECT value - FROM LCorrRamin3 - WHERE GeneId1='%s' and - GeneId2='%s' - """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) - ).fetchone() - if not result: - result = g.db.execute("""SELECT value - FROM LCorrRamin3 - WHERE GeneId2='%s' and - GeneId1='%s' - """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) - ).fetchone() - if result: - lit_corr = result.value - lit_corr_data[trait] = [gene_id, lit_corr] - else: - lit_corr_data[trait] = [gene_id, 0] - else: - lit_corr_data[trait] = [gene_id, 0] - - return lit_corr_data - -def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): - """ - Calculates the sample r (or rho) and p-value - - Given a primary trait and a target trait's sample values, - calculates either the pearson r or spearman rho and the p-value - using the corresponding scipy functions. - """ - - this_trait_vals = [] - shared_target_vals = [] - for i, sample in enumerate(target_dataset.group.samplelist): - if sample in this_trait.data: - this_sample_value = this_trait.data[sample].value - target_sample_value = target_vals[i] - this_trait_vals.append(this_sample_value) - shared_target_vals.append(target_sample_value) - - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) - - if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) - - if num_overlap > 5: - if scipy.isnan(sample_r): - return None - else: - return [sample_r, sample_p, num_overlap] - -def convert_to_mouse_gene_id(species=None, gene_id=None): - """If the species is rat or human, translate the gene_id to the mouse geneid - - If there is no input gene_id or there's no corresponding mouse gene_id, return None - - """ - if not gene_id: - return None - - mouse_gene_id = None - - if species == 'mouse': - mouse_gene_id = gene_id - - elif species == 'rat': - - query = """SELECT mouse - FROM GeneIDXRef - WHERE rat='%s'""" % escape(gene_id) - - result = g.db.execute(query).fetchone() - if result != None: - mouse_gene_id = result.mouse - - elif species == 'human': - - query = """SELECT mouse - FROM GeneIDXRef - WHERE human='%s'""" % escape(gene_id) - - result = g.db.execute(query).fetchone() - if result != None: - mouse_gene_id = result.mouse - - return mouse_gene_id - -def init_corr_params(start_vars): - method = "pearson" - if 'method' in start_vars: - method = start_vars['method'] - - type = "sample" - if 'type' in start_vars: - type = start_vars['type'] - - return_count = 500 - if 'return_count' in start_vars: - assert(start_vars['return_count'].isdigit()) - return_count = int(start_vars['return_count']) - - corr_params = { - 'method' : method, - 'type' : type, - 'return_count' : return_count - } - +from __future__ import absolute_import, division, print_function + +import collections + +import scipy + +from MySQLdb import escape_string as escape + +from flask import g + +from base import data_set +from base.trait import create_trait, retrieve_sample_data + +from wqflask.correlation.show_corr_results import generate_corr_json +from wqflask.correlation import correlation_functions + +from utility import webqtlUtil, helper_functions, corr_result_helpers +from utility.benchmark import Bench + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def do_correlation(start_vars): + assert('db' in start_vars) + assert('target_db' in start_vars) + assert('trait_id' in start_vars) + + this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) + target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) + this_trait = create_trait(dataset = this_dataset, name = start_vars['trait_id']) + this_trait = retrieve_sample_data(this_trait, this_dataset) + + corr_params = init_corr_params(start_vars) + + corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + final_results = [] + for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]): + if corr_params['type'] == "tissue": + [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p, + "symbol" : symbol + } + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": + [gene_id, sample_r] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "gene_id" : gene_id + } + else: + [sample_r, sample_p, num_overlap] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p + } + + final_results.append(result_dict) + + # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True) + + return final_results + +def calculate_results(this_trait, this_dataset, target_dataset, corr_params): + corr_results = {} + + target_dataset.get_trait_data() + + if corr_params['type'] == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + trait_geneid_dict = this_dataset.retrieve_genes("GeneId") + corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + else: + for target_trait, target_vals in target_dataset.trait_data.iteritems(): + result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + if result is not None: + corr_results[target_trait] = result + + sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + return sorted_results + +def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values()) + + tissue_corr_data = {} + for trait, symbol in trait_symbol_dict.iteritems(): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, + corr_params['method']) + + tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + + return tissue_corr_data + +def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): + input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + + lit_corr_data = {} + for trait, gene_id in trait_geneid_dict.iteritems(): + mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + + if mouse_gene_id and str(mouse_gene_id).find(";") == -1: + result = g.db.execute( + """SELECT value + FROM LCorrRamin3 + WHERE GeneId1='%s' and + GeneId2='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if not result: + result = g.db.execute("""SELECT value + FROM LCorrRamin3 + WHERE GeneId2='%s' and + GeneId1='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if result: + lit_corr = result.value + lit_corr_data[trait] = [gene_id, lit_corr] + else: + lit_corr_data[trait] = [gene_id, 0] + else: + lit_corr_data[trait] = [gene_id, 0] + + return lit_corr_data + +def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): + """ + Calculates the sample r (or rho) and p-value + + Given a primary trait and a target trait's sample values, + calculates either the pearson r or spearman rho and the p-value + using the corresponding scipy functions. + """ + + this_trait_vals = [] + shared_target_vals = [] + for i, sample in enumerate(target_dataset.group.samplelist): + if sample in this_trait.data: + this_sample_value = this_trait.data[sample].value + target_sample_value = target_vals[i] + this_trait_vals.append(this_sample_value) + shared_target_vals.append(target_sample_value) + + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + + if type == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + + if num_overlap > 5: + if scipy.isnan(sample_r): + return None + else: + return [sample_r, sample_p, num_overlap] + +def convert_to_mouse_gene_id(species=None, gene_id=None): + """If the species is rat or human, translate the gene_id to the mouse geneid + + If there is no input gene_id or there's no corresponding mouse gene_id, return None + + """ + if not gene_id: + return None + + mouse_gene_id = None + + if species == 'mouse': + mouse_gene_id = gene_id + + elif species == 'rat': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE rat='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + elif species == 'human': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE human='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + return mouse_gene_id + +def init_corr_params(start_vars): + method = "pearson" + if 'method' in start_vars: + method = start_vars['method'] + + type = "sample" + if 'type' in start_vars: + type = start_vars['type'] + + return_count = 500 + if 'return_count' in start_vars: + assert(start_vars['return_count'].isdigit()) + return_count = int(start_vars['return_count']) + + corr_params = { + 'method' : method, + 'type' : type, + 'return_count' : return_count + } + return corr_params \ No newline at end of file diff --git a/wqflask/wqflask/api/gen_menu.py b/wqflask/wqflask/api/gen_menu.py index c7bcb65d..bdcc3bf7 100644 --- a/wqflask/wqflask/api/gen_menu.py +++ b/wqflask/wqflask/api/gen_menu.py @@ -126,9 +126,7 @@ def build_types(species, group): InbredSet.Name = '{1}' AND ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND - ProbeSetFreeze.public > 0 AND - ProbeSetFreeze.confidentiality < 1 + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name""".format(species, group) results = [] @@ -194,9 +192,7 @@ def build_datasets(species, group, type_name): FROM InfoFiles, GenoFreeze, InbredSet WHERE InbredSet.Name = '{}' AND GenoFreeze.InbredSetId = InbredSet.Id AND - InfoFiles.InfoPageName = GenoFreeze.ShortName AND - GenoFreeze.public > 0 AND - GenoFreeze.confidentiality < 1 + InfoFiles.InfoPageName = GenoFreeze.ShortName ORDER BY GenoFreeze.CreateTime DESC""".format(group)).fetchone() if results != None: @@ -214,8 +210,7 @@ def build_datasets(species, group, type_name): Species.Id = InbredSet.SpeciesId AND InbredSet.Name = '{1}' AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{2}' AND - ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 + ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id ORDER BY ProbeSetFreeze.CreateTime DESC""".format(species, group, type_name)).fetchall() datasets = [] diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index d830cefc..92c27c9b 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -4,7 +4,7 @@ import string from base import data_set from base import webqtlConfig -from base.trait import GeneralTrait, retrieve_sample_data +from base.trait import create_trait, retrieve_sample_data from utility import helper_functions from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping @@ -18,7 +18,7 @@ def do_mapping_for_api(start_vars): dataset = data_set.create_dataset(dataset_name = start_vars['db']) dataset.group.get_markers() - this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id']) + this_trait = create_trait(dataset = dataset, name = start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index b22e0004..4fb8e69b 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -14,9 +14,6 @@ import urlparse import simplejson as json -import redis -Redis = redis.StrictRedis() - from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash, jsonify) @@ -30,8 +27,10 @@ from wqflask import model from utility import Bunch, Struct, hmac from utility.formatting import numify +from utility.redis_tools import get_redis_conn +Redis = get_redis_conn() -from base import trait +from base.trait import create_trait, retrieve_trait_info, jsonable from base.data_set import create_dataset import logging @@ -208,14 +207,14 @@ def view_collection(): if dataset_name == "Temp": group = name.split("_")[2] dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group) - trait_ob = trait.GeneralTrait(name=name, dataset=dataset) + trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) - trait_ob = trait.GeneralTrait(name=name, dataset=dataset) - trait_ob = trait.retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = create_trait(name=name, dataset=dataset) + trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) - json_version.append(trait.jsonable(trait_ob)) + json_version.append(jsonable(trait_ob)) collection_info = dict(trait_obs=trait_obs, uc = uc) diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index 21eb1493..5d74dc9d 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -37,7 +37,7 @@ from pprint import pformat as pf import reaper -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers from db import webqtlDatabaseFunction @@ -108,7 +108,7 @@ class ComparisonBarChart(object): trait_name, dataset_name = trait_db.split(":") #print("dataset_name:", dataset_name) dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) self.trait_list.append((trait_ob, dataset_ob)) diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index dfb81c54..04ec427d 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -4,7 +4,7 @@ import math from flask import g -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import corr_result_helpers from scipy import stats @@ -20,9 +20,9 @@ class CorrScatterPlot(object): self.data_set_1 = data_set.create_dataset(params['dataset_1']) self.data_set_2 = data_set.create_dataset(params['dataset_2']) #self.data_set_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1) - self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2) - #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3) + self.trait_1 = create_trait(name=params['trait_1'], dataset=self.data_set_1) + self.trait_2 = create_trait(name=params['trait_2'], dataset=self.data_set_2) + #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.data_set_3) samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b099b83d..7eab7184 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -47,7 +47,7 @@ import reaper from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers, hmac from db import webqtlDatabaseFunction @@ -97,7 +97,7 @@ class CorrelationResults(object): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) self.trait_id = start_vars['trait_id'] - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: @@ -199,7 +199,9 @@ class CorrelationResults(object): range_chr_as_int = order_id for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): - trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + if not trait_object: + continue if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno": #ZS: Convert trait chromosome to an int for the location range option diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index b5c45d05..2b9467d1 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -43,14 +43,16 @@ from pprint import pformat as pf import reaper -import redis -Redis = redis.StrictRedis() +from utility.redis_tools import get_redis_conn +Redis = get_redis_conn() +THIRTY_DAYS = 60 * 60 * 24 * 30 from utility.THCell import THCell from utility.TDCell import TDCell from base.trait import GeneralTrait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers + from db import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlation_functions @@ -204,20 +206,6 @@ class CorrelationMatrix(object): samples = self.all_sample_list, sample_data = self.sample_data,) # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) - - def get_trait_db_obs(self, trait_db_list): - - self.trait_list = [] - for i, trait_db in enumerate(trait_db_list): - if i == (len(trait_db_list) - 1): - break - trait_name, dataset_name = trait_db.split(":") - #print("dataset_name:", dataset_name) - dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, - name=trait_name, - cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') @@ -257,7 +245,7 @@ class CorrelationMatrix(object): this_vals_string += "x " this_vals_string = this_vals_string[:-1] - Redis.set(trait_id, this_vals_string) + Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) self.pca_trait_ids.append(trait_id) return pca diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 4415b86a..35067036 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -17,7 +17,7 @@ import csv import itertools from base import data_set -from base import trait as TRAIT +from base.trait import create_trait, retrieve_sample_data from utility import helper_functions from utility.tools import locate, GN2_BRANCH_URL @@ -122,8 +122,8 @@ class CTL(object): logger.debug("retrieving data for", trait) if trait != "": ts = trait.split(':') - gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) - gt = TRAIT.retrieve_sample_data(gt, dataset, individuals) + gt = create_trait(name = ts[0], dataset_name = ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in gt.data.keys(): traits.append(gt.data[ind].value) @@ -180,8 +180,8 @@ class CTL(object): logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target - gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB - gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB + gtS = create_trait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB + gtT = create_trait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index b0ca5ced..1e15d28f 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -34,10 +34,7 @@ class DoSearch(object): self.search_type = search_type if self.dataset: - logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset)) - logger.debug("self.dataset.group is: ", pf(self.dataset.group)) #Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) def execute(self, query): @@ -54,10 +51,6 @@ class DoSearch(object): return keyword - #def escape(self, stringy): - # """Shorter name than self.db_conn.escape_string""" - # return escape(str(stringy)) - def mescape(self, *items): """Multiple escape""" escaped = [escape(str(item)) for item in items] @@ -71,8 +64,6 @@ class DoSearch(object): @classmethod def get_search(cls, search_type): - logger.debug("search_types are:", pf(cls.search_types)) - search_type_string = search_type['dataset_type'] if 'key' in search_type and search_type['key'] != None: search_type_string += '_' + search_type['key'] @@ -648,7 +639,7 @@ class CisTransLrsSearch(DoSearch): escape(self.dataset.type), chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 04e3d578..c65a1415 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -4,7 +4,7 @@ import json from flask import Flask, g from base.data_set import create_dataset -from base.trait import GeneralTrait +from base.trait import create_trait from db import webqtlDatabaseFunction from base import webqtlConfig @@ -96,7 +96,9 @@ class GSearch(object): #dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) #trait_id = line[4] #with Bench("Building trait object"): - trait_ob = GeneralTrait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + if not trait_ob: + continue max_lrs_text = "N/A" if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": max_lrs_text = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) @@ -210,13 +212,12 @@ class GSearch(object): if line[11] != "" and line[11] != None: this_trait['additive'] = '%.3f' % line[11] - #dataset = create_dataset(line[2], "Publish") - #trait_id = line[3] - #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) this_trait['max_lrs_text'] = "N/A" + trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + if not trait_ob: + continue if this_trait['dataset'] == this_trait['group'] + "Publish": try: - trait_ob = GeneralTrait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) except: diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index a648667b..74fa4329 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -246,6 +246,12 @@ class DisplayMappingResults(object): if 'output_files' in start_vars: self.output_files = ",".join(start_vars['output_files']) + self.categorical_vars = "" + self.perm_strata = "" + if 'perm_strata' in start_vars.keys() and 'categorical_vars' in start_vars.keys(): + self.categorical_vars = start_vars['categorical_vars'] + self.perm_strata = start_vars['perm_strata'] + self.selectedChr = int(start_vars['selected_chr']) self.strainlist = start_vars['samples'] diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index e2b15c26..88d27517 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,7 +1,7 @@ import os, math, string, random, json from base import webqtlConfig -from base.trait import GeneralTrait +from base.trait import create_trait from base.data_set import create_dataset from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE @@ -129,7 +129,7 @@ def gen_covariates_file(this_dataset, covariates, samples): this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index e4a4d127..c5590a85 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -6,7 +6,7 @@ import json from flask import g from base.webqtlConfig import TMPDIR -from base.trait import GeneralTrait +from base.trait import create_trait from base.data_set import create_dataset from utility import webqtlUtil from utility.tools import locate, TEMPDIR @@ -86,7 +86,6 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype logger.info("Added pheno and names"); - # Scan for QTLs marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers logger.info("Marker covars done"); if cofactors != "": @@ -115,6 +114,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec else: if do_control == "true" or cofactors != "": logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) + ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') else: logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) @@ -295,7 +295,7 @@ def add_cofactors(cross, this_dataset, covariates, samples): covar_as_string = "c(" trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) @@ -321,27 +321,27 @@ def add_cofactors(cross, this_dataset, covariates, samples): datatype = get_trait_data_type(covariate) logger.info("Covariate: " + covariate + " is of type: " + datatype); if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names + logger.info("call of add_categorical_covar"); + cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross + logger.info("add_categorical_covar returned"); + for z, col_name in enumerate(col_names): # Go through the additional covar names + if i < (len(covariate_list) - 1): + covar_name_string += '"' + col_name + '", ' + else: + if(z < (len(col_names) -1)): + covar_name_string += '"' + col_name + '", ' + else: + covar_name_string += '"' + col_name + '"' + + logger.info("covar_name_string:" + covar_name_string) + else: + col_name = "covar_" + str(i) + cross = add_phenotype(cross, covar_as_string, col_name) if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - if(z < (len(col_names) -1)): covar_name_string += '"' + col_name + '", ' - else: + else: covar_name_string += '"' + col_name + '"' - logger.info("covar_name_string:" + covar_name_string); - else: - col_name = "covar_" + str(i) - cross = add_phenotype(cross, covar_as_string, col_name) - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - covar_name_string += '"' + col_name + '"' - covar_name_string += ")" logger.info("covar_name_string:" + covar_name_string); covars_ob = pull_var("trait_covars", cross, covar_name_string) @@ -350,9 +350,13 @@ def add_cofactors(cross, this_dataset, covariates, samples): def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user - covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - ro.r('covnames <- c(' + covariate_names + ')') + userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + logger.debug(userinput_sanitized) + if len(userinput_sanitized) > 0: + covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) + ro.r('covnames <- c(' + covariate_names + ')') + else: + ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") @@ -404,16 +408,4 @@ def process_rqtl_results(result, species_name): # TODO: how to make this marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results - -def get_trait_data_type(trait_db_string): - # Get a trait's type (numeric, categorical, etc) from the DB - the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - results_json = g.db.execute(the_query).fetchone() - - results_ob = json.loads(results_json[0]) - - if trait_db_string in results_ob: - return results_ob[trait_db_string] - else: - return "numeric" + return qtl_results \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 5f7710ab..0711b852 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -161,7 +161,7 @@ class RunMapping(object): self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] - self.covariates = start_vars['covariates'] if "covariates" in start_vars else None + self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 @@ -467,6 +467,7 @@ class RunMapping(object): #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, + categorical_vars = self.categorical_vars, chr_lengths = chr_lengths, num_perm = self.num_perm, perm_results = self.perm_output, diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index 152e4168..f41f3017 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -44,7 +44,7 @@ import reaper from utility.THCell import THCell from utility.TDCell import TDCell -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers from utility.tools import GN2_BRANCH_URL @@ -217,7 +217,7 @@ class NetworkGraph(object): break trait_name, dataset_name = trait_db.split(":") dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) self.trait_list.append((trait_ob, dataset_ob)) \ No newline at end of file diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 8f702d58..de4b01eb 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -1,15 +1,9 @@ -# from __future__ import absolute_import, print_function, division +from __future__ import absolute_import, print_function, division - -import os -import cPickle import re import uuid from math import * import time -import math -import datetime -import collections import re import requests @@ -18,18 +12,16 @@ from pprint import pformat as pf import json from base.data_set import create_dataset -from base import trait +from base.trait import create_trait from wqflask import parser from wqflask import do_search -from utility import webqtlUtil,tools from db import webqtlDatabaseFunction -from flask import render_template, Flask, g +from flask import Flask, g -from utility import formatting -from utility import hmac +from utility import hmac, helper_functions from utility.tools import GN2_BASE_URL -from utility.type_checking import is_float, is_int, is_str, get_float, get_int, get_string +from utility.type_checking import is_str from utility.logger import getLogger logger = getLogger(__name__ ) @@ -86,7 +78,7 @@ views.py). try: self.search() except: - self.search_term_exists = False + self.search_term_exists = False if self.search_term_exists: self.gen_search_result() @@ -113,50 +105,49 @@ views.py). trait_dict = {} trait_id = result[0] - trait_dict['index'] = index + 1 - this_trait = trait.GeneralTrait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - trait_dict['name'] = this_trait.name - if this_trait.dataset.type == "Publish": - trait_dict['display_name'] = this_trait.display_name - else: - trait_dict['display_name'] = this_trait.name - trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) - if this_trait.dataset.type == "ProbeSet": - trait_dict['symbol'] = this_trait.symbol - trait_dict['description'] = this_trait.description_display.decode('utf-8', 'replace') - trait_dict['location'] = this_trait.location_repr - trait_dict['mean'] = "N/A" - trait_dict['additive'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = '%.3f' % this_trait.mean - trait_dict['lrs_score'] = this_trait.LRS_score_repr - trait_dict['lrs_location'] = this_trait.LRS_location_repr - if this_trait.additive != "": - trait_dict['additive'] = '%.3f' % this_trait.additive - elif this_trait.dataset.type == "Geno": - trait_dict['location'] = this_trait.location_repr - elif this_trait.dataset.type == "Publish": - trait_dict['description'] = this_trait.description_display - trait_dict['authors'] = this_trait.authors - trait_dict['pubmed_id'] = "N/A" - if this_trait.pubmed_id: - trait_dict['pubmed_id'] = this_trait.pubmed_id - trait_dict['pubmed_link'] = this_trait.pubmed_link - trait_dict['pubmed_text'] = this_trait.pubmed_text - trait_dict['mean'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = '%.3f' % this_trait.mean - trait_dict['lrs_score'] = this_trait.LRS_score_repr - trait_dict['lrs_location'] = this_trait.LRS_location_repr - trait_dict['additive'] = "N/A" - if this_trait.additive != "": - trait_dict['additive'] = '%.3f' % this_trait.additive - trait_list.append(trait_dict) - #json_trait_list.append(trait.jsonable_table_row(this_trait, self.dataset.name, index + 1)) + this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + if this_trait: + trait_dict['index'] = index + 1 + trait_dict['name'] = this_trait.name + if this_trait.dataset.type == "Publish": + trait_dict['display_name'] = this_trait.display_name + else: + trait_dict['display_name'] = this_trait.name + trait_dict['dataset'] = this_trait.dataset.name + trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + if this_trait.dataset.type == "ProbeSet": + trait_dict['symbol'] = this_trait.symbol + trait_dict['description'] = this_trait.description_display.decode('utf-8', 'replace') + trait_dict['location'] = this_trait.location_repr + trait_dict['mean'] = "N/A" + trait_dict['additive'] = "N/A" + if this_trait.mean != "" and this_trait.mean != None: + trait_dict['mean'] = '%.3f' % this_trait.mean + trait_dict['lrs_score'] = this_trait.LRS_score_repr + trait_dict['lrs_location'] = this_trait.LRS_location_repr + if this_trait.additive != "": + trait_dict['additive'] = '%.3f' % this_trait.additive + elif this_trait.dataset.type == "Geno": + trait_dict['location'] = this_trait.location_repr + elif this_trait.dataset.type == "Publish": + trait_dict['description'] = this_trait.description_display + trait_dict['authors'] = this_trait.authors + trait_dict['pubmed_id'] = "N/A" + if this_trait.pubmed_id: + trait_dict['pubmed_id'] = this_trait.pubmed_id + trait_dict['pubmed_link'] = this_trait.pubmed_link + trait_dict['pubmed_text'] = this_trait.pubmed_text + trait_dict['mean'] = "N/A" + if this_trait.mean != "" and this_trait.mean != None: + trait_dict['mean'] = '%.3f' % this_trait.mean + trait_dict['lrs_score'] = this_trait.LRS_score_repr + trait_dict['lrs_location'] = this_trait.LRS_location_repr + trait_dict['additive'] = "N/A" + if this_trait.additive != "": + trait_dict['additive'] = '%.3f' % this_trait.additive + trait_list.append(trait_dict) self.trait_list = json.dumps(trait_list) - #self.json_trait_list = json.dumps(json_trait_list) def search(self): """ @@ -234,7 +225,6 @@ views.py). self.header_fields = the_search.header_fields def get_search_ob(self, a_search): - logger.debug("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] search_operator = a_search['separator'] search_type = {} @@ -243,12 +233,10 @@ views.py). search_type['key'] = a_search['key'].upper() else: search_type['key'] = None - logger.debug("search_type is:", pf(search_type)) search_ob = do_search.DoSearch.get_search(search_type) if search_ob: search_class = getattr(do_search, search_ob) - logger.debug("search_class is: ", pf(search_class)) the_search = search_class(search_term, search_operator, self.dataset, diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index 107f87c6..253c887b 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -4,7 +4,7 @@ import simplejson as json from pprint import pformat as pf -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set def export_sample_table(targs): @@ -26,7 +26,7 @@ def export_sample_table(targs): def get_export_metadata(trait_id, dataset_name): dataset = data_set.create_dataset(dataset_name) - this_trait = GeneralTrait(dataset=dataset, + this_trait = create_trait(dataset=dataset, name=trait_id, cellid=None, get_qtl_info=False) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 29b2f77e..c77e247f 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -10,9 +10,6 @@ import json as json from collections import OrderedDict -import redis -Redis = redis.StrictRedis() - import numpy as np import scipy.stats as ss @@ -21,11 +18,15 @@ from flask import Flask, g from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList -from utility import webqtlUtil, Plot, Bunch, helper_functions -from utility.tools import locate_ignore_error -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from db import webqtlDatabaseFunction +from utility import webqtlUtil, Plot, Bunch, helper_functions +from utility.authentication_tools import check_owner +from utility.tools import locate_ignore_error +from utility.redis_tools import get_redis_conn, get_resource_id +Redis = get_redis_conn() +ONE_YEAR = 60 * 60 * 24 * 365 from pprint import pformat as pf @@ -55,9 +56,9 @@ class ShowTrait(object): self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) # Put values in Redis so they can be looked up later if added to a collection - Redis.set(self.trait_id, kw['trait_paste']) + Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR) self.trait_vals = kw['trait_paste'].split() - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: @@ -66,11 +67,13 @@ class ShowTrait(object): self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() + self.resource_id = check_owner(self.dataset, self.trait_id) + #ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.sequence diff --git a/wqflask/wqflask/templates/admin/group_manager.html b/wqflask/wqflask/templates/admin/group_manager.html index ac5c1350..b7df1aad 100644 --- a/wqflask/wqflask/templates/admin/group_manager.html +++ b/wqflask/wqflask/templates/admin/group_manager.html @@ -2,17 +2,25 @@ {% block title %}Group Manager{% endblock %} {% block content %} - {{ header("List of groups", "" )}} -