diff options
| author | zsloan | 2020-06-04 14:23:30 -0500 | 
|---|---|---|
| committer | zsloan | 2020-06-04 14:23:30 -0500 | 
| commit | 1a663f987bf3a640d21c2c89402318d5433efd9e (patch) | |
| tree | 07314f422059ce6e502feb9827f574af7512e73d /wqflask | |
| parent | c562bd7cd68735ded82d39868cf1af36c35a7920 (diff) | |
| download | genenetwork2-1a663f987bf3a640d21c2c89402318d5433efd9e.tar.gz | |
Really should have split this into many more commits:
- Now use proxy to pull trait data and hide traits/results that the user doesn't have view permission for - Created a factory method for creating trait ob so it can return None when user doesn't have view permissions (this is why such a large number of files are changed) - Added metadata to permutation export - Added current group management code - Added fixed password verification e-mail code
Diffstat (limited to 'wqflask')
30 files changed, 637 insertions, 617 deletions
| diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 1b7cb23c..b133bf21 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -11,6 +11,7 @@ from base.data_set import create_dataset from db import webqtlDatabaseFunction from utility import webqtlUtil from utility import hmac +from utility.authentication_tools import check_resource_availability from utility.tools import GN2_BASE_URL from utility.redis_tools import get_redis_conn Redis = get_redis_conn() @@ -21,11 +22,33 @@ import simplejson as json from MySQLdb import escape_string as escape from pprint import pformat as pf -from flask import Flask, g, request, url_for +from flask import Flask, g, request, url_for, redirect from utility.logger import getLogger logger = getLogger(__name__ ) +def create_trait(**kw): + assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + + permitted = True + if kw.get('name'): + if kw.get('dataset_name'): + if kw.get('dataset_name') != "Temp": + dataset = create_dataset(kw.get('dataset_name')) + else: + dataset = kw.get('dataset') + + if kw.get('dataset_name') != "Temp": + if dataset.type == 'Publish': + permitted = check_resource_availability(dataset, kw.get('name')) + else: + permitted = check_resource_availability(dataset) + + if permitted: + return GeneralTrait(**kw) + else: + return None + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -50,6 +73,7 @@ class GeneralTrait(object): self.haveinfo = kw.get('haveinfo', False) self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet self.data = kw.get('data', {}) + self.view = True # Sets defaults self.locus = None @@ -77,6 +101,7 @@ class GeneralTrait(object): # So we could add a simple if statement to short-circuit this if necessary if self.dataset.type != "Temp": self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info) + if get_sample_info != False: self = retrieve_sample_data(self, self.dataset) @@ -212,26 +237,28 @@ def get_sample_data(): trait = params['trait'] dataset = params['dataset'] - trait_ob = GeneralTrait(name=trait, dataset_name=dataset) - - trait_dict = {} - trait_dict['name'] = trait - trait_dict['db'] = dataset - trait_dict['type'] = trait_ob.dataset.type - trait_dict['group'] = trait_ob.dataset.group.name - trait_dict['tissue'] = trait_ob.dataset.tissue - trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) - trait_dict['description'] = trait_ob.description_display - if trait_ob.dataset.type == "ProbeSet": - trait_dict['symbol'] = trait_ob.symbol - trait_dict['location'] = trait_ob.location_repr - elif trait_ob.dataset.type == "Publish": - if trait_ob.pubmed_id: - trait_dict['pubmed_link'] = trait_ob.pubmed_link - trait_dict['pubmed_text'] = trait_ob.pubmed_text - - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + trait_ob = create_trait(name=trait, dataset_name=dataset) + if trait_ob: + trait_dict = {} + trait_dict['name'] = trait + trait_dict['db'] = dataset + trait_dict['type'] = trait_ob.dataset.type + trait_dict['group'] = trait_ob.dataset.group.name + trait_dict['tissue'] = trait_ob.dataset.tissue + trait_dict['species'] = trait_ob.dataset.group.species + trait_dict['url'] = url_for('show_trait_page', trait_id = trait, dataset = dataset) + trait_dict['description'] = trait_ob.description_display + if trait_ob.dataset.type == "ProbeSet": + trait_dict['symbol'] = trait_ob.symbol + trait_dict['location'] = trait_ob.location_repr + elif trait_ob.dataset.type == "Publish": + if trait_ob.pubmed_id: + trait_dict['pubmed_link'] = trait_ob.pubmed_link + trait_dict['pubmed_text'] = trait_ob.pubmed_text + + return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) + else: + return None def jsonable(trait): """Return a dict suitable for using as json @@ -350,91 +377,36 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): assert dataset, "Dataset doesn't exist" if dataset.type == 'Publish': - resource_id = hmac.data_hmac("{}:{}".format(dataset.id, trait.name)) - - the_url = "http://localhost:8080/run_action/?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) - trait_data = json.loads(requests.get("http://localhost:8080/run_action/?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id))) - - query = """ - SELECT - PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, - Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, - Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, PublishXRef.mean, - Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, - Publication.Authors, Publication.Title, Publication.Abstract, - Publication.Journal, Publication.Volume, Publication.Pages, - Publication.Month, Publication.Year, PublishXRef.Sequence, - Phenotype.Units, PublishXRef.comments - FROM - PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet - WHERE - PublishXRef.Id = %s AND - Phenotype.Id = PublishXRef.PhenotypeId AND - Publication.Id = PublishXRef.PublicationId AND - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishXRef.InbredSetId = InbredSet.Id AND - PublishFreeze.Id = %s - """ % (trait.name, dataset.id) - - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - - - #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. + resource_id = hmac.hmac_creation("{}:{}:{}".format('dataset-publish', dataset.id, trait.name)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format(resource_id, g.user_session.user_id) elif dataset.type == 'ProbeSet': - display_fields_string = ', ProbeSet.'.join(dataset.display_fields) - display_fields_string = 'ProbeSet.' + display_fields_string - query = """ - SELECT %s - FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef - WHERE - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSet.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(str(trait.name))) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name - # to avoid the problem of same marker name from different species. - elif dataset.type == 'Geno': - display_fields_string = string.join(dataset.display_fields,',Geno.') - display_fields_string = 'Geno.' + display_fields_string - query = """ - SELECT %s - FROM Geno, GenoFreeze, GenoXRef - WHERE - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoXRef.GenoId = Geno.Id AND - GenoFreeze.Name = '%s' AND - Geno.Name = '%s' - """ % (escape(display_fields_string), - escape(dataset.name), - escape(trait.name)) - logger.sql(query) - trait_info = g.db.execute(query).fetchone() - else: #Temp type - query = """SELECT %s FROM %s WHERE Name = %s""" - logger.sql(query) - trait_info = g.db.execute(query, - (string.join(dataset.display_fields,','), - dataset.type, trait.name)).fetchone() + resource_id = hmac.hmac_creation("{}:{}".format('dataset-probeset', dataset.id)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + else: + resource_id = hmac.hmac_creation("{}:{}".format('dataset-geno', dataset.id)) + the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format(resource_id, g.user_session.user_id, trait.name) + + try: + response = requests.get(the_url).content + if response.strip() == "no-access": + trait.view = False + return trait + except: + resource_info = get_resource_info(resource_id) + default_permissions = resource_info['default_mask']['data'] + if 'view' not in default_persmissions: + trait.view = False + return trait + + trait_info = json.loads(response) if trait_info: trait.haveinfo = True - #XZ: assign SQL query result to trait attributes. for i, field in enumerate(dataset.display_fields): holder = trait_info[i] - # if isinstance(trait_info[i], basestring): - # logger.debug("HOLDER:", holder) - # logger.debug("HOLDER2:", holder.decode(encoding='latin1')) - # holder = unicode(trait_info[i], "utf-8", "ignore") - if isinstance(trait_info[i], basestring): - holder = holder.encode('latin1') + #if isinstance(trait_info[i], basestring): + # holder = holder.encode('latin1') setattr(trait, field, holder) if dataset.type == 'Publish': @@ -453,13 +425,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if trait.confidential: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description - - #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( - # privilege=self.dataset.privilege, - # userName=self.dataset.userName, - # authorized_users=self.authorized_users): - # - # description = self.pre_publication_description else: trait.abbreviation = trait.post_publication_abbreviation if description: diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index e7c04fef..9ce809b6 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, print_function, division -from base.trait import GeneralTrait from base import data_set +from base.trait import create_trait from base.species import TheSpecies from utility import hmac @@ -11,7 +11,6 @@ from flask import Flask, g import logging logger = logging.getLogger(__name__ ) - def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" if "temp_trait" in start_vars.keys(): @@ -24,7 +23,7 @@ def get_species_dataset_trait(self, start_vars): logger.debug("After creating dataset") self.species = TheSpecies(dataset=self.dataset) logger.debug("After creating species") - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None, get_qtl_info=True) @@ -34,7 +33,6 @@ def get_species_dataset_trait(self, start_vars): #self.dataset.group.read_genotype_file() #self.genotype = self.dataset.group.genotype - def get_trait_db_obs(self, trait_db_list): if isinstance(trait_db_list, basestring): trait_db_list = trait_db_list.split(",") @@ -49,10 +47,11 @@ def get_trait_db_obs(self, trait_db_list): dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2]) else: dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) + if trait_ob: + self.trait_list.append((trait_ob, dataset_ob)) def get_species_groups(): diff --git a/wqflask/utility/redis_tools.py b/wqflask/utility/redis_tools.py index 15841032..0ad96879 100644 --- a/wqflask/utility/redis_tools.py +++ b/wqflask/utility/redis_tools.py @@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import import uuid import simplejson as json +import datetime import redis # used for collections @@ -96,15 +97,22 @@ def get_user_groups(user_id): for key in groups_list: group_ob = json.loads(groups_list[key]) group_admins = set(group_ob['admins']) - group_users = set(group_ob['users']) + group_members = set(group_ob['members']) if user_id in group_admins: admin_group_ids.append(group_ob['id']) - elif user_id in group_users: + elif user_id in group_members: user_group_ids.append(group_ob['id']) else: continue - return admin_group_ids, user_group_ids + admin_groups = [] + user_groups = [] + for the_id in admin_group_ids: + admin_groups.append(get_group_info(the_id)) + for the_id in user_group_ids: + user_groups.append(get_group_info(the_id)) + + return admin_groups, user_groups def get_group_info(group_id): group_json = Redis.hget("groups", group_id) @@ -114,18 +122,18 @@ def get_group_info(group_id): return group_info -def create_group(admin_member_ids, user_member_ids = [], group_name = ""): +def create_group(admin_user_ids, member_user_ids = [], group_name = "Default Group Name"): group_id = str(uuid.uuid4()) new_group = { "id" : group_id, - "admins": admin_member_ids, - "users" : user_member_ids, + "admins": admin_user_ids, + "members" : member_user_ids, "name" : group_name, "created_timestamp": datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p'), "changed_timestamp": datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') } - Redis.hset("groups", group_id, new_group) + Redis.hset("groups", group_id, json.dumps(new_group)) return new_group @@ -144,7 +152,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS if admins: group_users = set(group_info["admins"]) else: - group_users = set(group_info["users"]) + group_users = set(group_info["members"]) for email in user_emails: user_id = get_user_id("email_address", email) @@ -153,7 +161,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS if admins: group_info["admins"] = list(group_users) else: - group_info["users"] = list(group_users) + group_info["members"] = list(group_users) group_info["changed_timestamp"] = datetime.datetime.utcnow().strftime('%b %d %Y %I:%M%p') Redis.hset("groups", group_id, json.dumps(group_info)) @@ -161,7 +169,7 @@ def add_users_to_group(user_id, group_id, user_emails = [], admins = False): #ZS else: return None -def remove_users_from_group(user_id, users_to_remove_ids, group_id, user_type = "users"): #ZS: User type is because I assume admins can remove other admins +def remove_users_from_group(user_id, users_to_remove_ids, group_id, user_type = "members"): #ZS: User type is because I assume admins can remove other admins group_info = get_group_info(group_id) if user_id in group_info["admins"]: group_users = set(group_info[user_type]) @@ -174,6 +182,7 @@ def change_group_name(user_id, group_id, new_name): group_info = get_group_info(group_id) if user_id in group_info["admins"]: group_info["name"] = new_name + Redis.hset("groups", group_id, json.dumps(group_info)) return group_info else: return None @@ -182,22 +191,21 @@ def get_resources(): resource_list = Redis.hgetall("resources") return resource_list -def get_resource_id(dataset_type, dataset_id, trait_id = None, all_resources = None): - if not all_resources: - all_resources = get_resources() - - resource_list = [[key, json.loads(value)] for key, value in all_resources.items()] - - if not trait_id: - matched_resources = [resource[0] for resource in resource_list if resource[1]['data']['dataset'] == dataset_id] - else: - matched_resources = [resource[0] for resource in resource_list if resource[1]['data']['dataset'] == dataset_id and resource[1]['data']['trait'] == trait_id] - - if len(matched_resources): - return matched_resources[0] +def get_resource_id(dataset, trait_id=None): + if dataset.type == "Publish": + if trait_id: + resource_id = hmac.hmac_creation("{}:{}:{}".format('dataset-publish', dataset.id, trait_id)) + else: + return False + elif dataset.type == "ProbeSet": + resource_id = hmac.hmac_creation("{}:{}".format('dataset-probeset', dataset.id)) + elif dataset.type == "Geno": + resource_id = hmac.hmac_creation("{}:{}".format('dataset-geno', dataset.id)) else: return False + return resource_id + def get_resource_info(resource_id): resource_info = Redis.hget("resources", resource_id) return json.loads(resource_info) @@ -205,9 +213,9 @@ def get_resource_info(resource_id): def add_resource(resource_info): if 'trait' in resource_info['data']: - resource_id = hmac.data_hmac('{}:{}'.format(str(resource_info['data']['dataset']), str(resource_info['data']['trait']))) + resource_id = hmac.hmac_creation('{}:{}:{}'.format(str(resource_info['type']), str(resource_info['data']['dataset']), str(resource_info['data']['trait']))) else: - resource_id = hmac.data_hmac('{}'.format(str(resource_info['data']['dataset']))) + resource_id = hmac.hmac_creation('{}:{}'.format(str(resource_info['type']), str(resource_info['data']['dataset']))) Redis.hset("resources", resource_id, json.dumps(resource_info)) diff --git a/wqflask/wqflask/api/correlation.py b/wqflask/wqflask/api/correlation.py index 66eb94ac..7f5312c1 100644 --- a/wqflask/wqflask/api/correlation.py +++ b/wqflask/wqflask/api/correlation.py @@ -1,237 +1,237 @@ -from __future__ import absolute_import, division, print_function - -import collections - -import scipy - -from MySQLdb import escape_string as escape - -from flask import g - -from base import data_set -from base.trait import GeneralTrait, retrieve_sample_data - -from wqflask.correlation.show_corr_results import generate_corr_json -from wqflask.correlation import correlation_functions - -from utility import webqtlUtil, helper_functions, corr_result_helpers -from utility.benchmark import Bench - -import utility.logger -logger = utility.logger.getLogger(__name__ ) - -def do_correlation(start_vars): - assert('db' in start_vars) - assert('target_db' in start_vars) - assert('trait_id' in start_vars) - - this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) - target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) - this_trait = GeneralTrait(dataset = this_dataset, name = start_vars['trait_id']) - this_trait = retrieve_sample_data(this_trait, this_dataset) - - corr_params = init_corr_params(start_vars) - - corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) - #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) - - final_results = [] - for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]): - if corr_params['type'] == "tissue": - [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p, - "symbol" : symbol - } - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": - [gene_id, sample_r] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "gene_id" : gene_id - } - else: - [sample_r, sample_p, num_overlap] = corr_results[trait] - result_dict = { - "trait" : trait, - "sample_r" : sample_r, - "#_strains" : num_overlap, - "p_value" : sample_p - } - - final_results.append(result_dict) - - # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True) - - return final_results - -def calculate_results(this_trait, this_dataset, target_dataset, corr_params): - corr_results = {} - - target_dataset.get_trait_data() - - if corr_params['type'] == "tissue": - trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) - sorted_results = collections.OrderedDict(sorted(corr_results.items(), - key=lambda t: -abs(t[1][1]))) - elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" - trait_geneid_dict = this_dataset.retrieve_genes("GeneId") - corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) - sorted_results = collections.OrderedDict(sorted(corr_results.items(), - key=lambda t: -abs(t[1][1]))) - else: - for target_trait, target_vals in target_dataset.trait_data.iteritems(): - result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) - if result is not None: - corr_results[target_trait] = result - - sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) - - return sorted_results - -def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) - - if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] - - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values()) - - tissue_corr_data = {} - for trait, symbol in trait_symbol_dict.iteritems(): - if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] - - result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - corr_params['method']) - - tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] - - return tissue_corr_data - -def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): - input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) - - lit_corr_data = {} - for trait, gene_id in trait_geneid_dict.iteritems(): - mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) - - if mouse_gene_id and str(mouse_gene_id).find(";") == -1: - result = g.db.execute( - """SELECT value - FROM LCorrRamin3 - WHERE GeneId1='%s' and - GeneId2='%s' - """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) - ).fetchone() - if not result: - result = g.db.execute("""SELECT value - FROM LCorrRamin3 - WHERE GeneId2='%s' and - GeneId1='%s' - """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) - ).fetchone() - if result: - lit_corr = result.value - lit_corr_data[trait] = [gene_id, lit_corr] - else: - lit_corr_data[trait] = [gene_id, 0] - else: - lit_corr_data[trait] = [gene_id, 0] - - return lit_corr_data - -def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): - """ - Calculates the sample r (or rho) and p-value - - Given a primary trait and a target trait's sample values, - calculates either the pearson r or spearman rho and the p-value - using the corresponding scipy functions. - """ - - this_trait_vals = [] - shared_target_vals = [] - for i, sample in enumerate(target_dataset.group.samplelist): - if sample in this_trait.data: - this_sample_value = this_trait.data[sample].value - target_sample_value = target_vals[i] - this_trait_vals.append(this_sample_value) - shared_target_vals.append(target_sample_value) - - this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) - - if type == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) - else: - sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) - - if num_overlap > 5: - if scipy.isnan(sample_r): - return None - else: - return [sample_r, sample_p, num_overlap] - -def convert_to_mouse_gene_id(species=None, gene_id=None): - """If the species is rat or human, translate the gene_id to the mouse geneid - - If there is no input gene_id or there's no corresponding mouse gene_id, return None - - """ - if not gene_id: - return None - - mouse_gene_id = None - - if species == 'mouse': - mouse_gene_id = gene_id - - elif species == 'rat': - - query = """SELECT mouse - FROM GeneIDXRef - WHERE rat='%s'""" % escape(gene_id) - - result = g.db.execute(query).fetchone() - if result != None: - mouse_gene_id = result.mouse - - elif species == 'human': - - query = """SELECT mouse - FROM GeneIDXRef - WHERE human='%s'""" % escape(gene_id) - - result = g.db.execute(query).fetchone() - if result != None: - mouse_gene_id = result.mouse - - return mouse_gene_id - -def init_corr_params(start_vars): - method = "pearson" - if 'method' in start_vars: - method = start_vars['method'] - - type = "sample" - if 'type' in start_vars: - type = start_vars['type'] - - return_count = 500 - if 'return_count' in start_vars: - assert(start_vars['return_count'].isdigit()) - return_count = int(start_vars['return_count']) - - corr_params = { - 'method' : method, - 'type' : type, - 'return_count' : return_count - } - +from __future__ import absolute_import, division, print_function + +import collections + +import scipy + +from MySQLdb import escape_string as escape + +from flask import g + +from base import data_set +from base.trait import create_trait, retrieve_sample_data + +from wqflask.correlation.show_corr_results import generate_corr_json +from wqflask.correlation import correlation_functions + +from utility import webqtlUtil, helper_functions, corr_result_helpers +from utility.benchmark import Bench + +import utility.logger +logger = utility.logger.getLogger(__name__ ) + +def do_correlation(start_vars): + assert('db' in start_vars) + assert('target_db' in start_vars) + assert('trait_id' in start_vars) + + this_dataset = data_set.create_dataset(dataset_name = start_vars['db']) + target_dataset = data_set.create_dataset(dataset_name = start_vars['target_db']) + this_trait = create_trait(dataset = this_dataset, name = start_vars['trait_id']) + this_trait = retrieve_sample_data(this_trait, this_dataset) + + corr_params = init_corr_params(start_vars) + + corr_results = calculate_results(this_trait, this_dataset, target_dataset, corr_params) + #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + final_results = [] + for _trait_counter, trait in enumerate(corr_results.keys()[:corr_params['return_count']]): + if corr_params['type'] == "tissue": + [sample_r, num_overlap, sample_p, symbol] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p, + "symbol" : symbol + } + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": + [gene_id, sample_r] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "gene_id" : gene_id + } + else: + [sample_r, sample_p, num_overlap] = corr_results[trait] + result_dict = { + "trait" : trait, + "sample_r" : sample_r, + "#_strains" : num_overlap, + "p_value" : sample_p + } + + final_results.append(result_dict) + + # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True) + + return final_results + +def calculate_results(this_trait, this_dataset, target_dataset, corr_params): + corr_results = {} + + target_dataset.get_trait_data() + + if corr_params['type'] == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + corr_results = do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + elif corr_params['type'] == "literature" or corr_params['type'] == "lit": #ZS: Just so a user can use either "lit" or "literature" + trait_geneid_dict = this_dataset.retrieve_genes("GeneId") + corr_results = do_literature_correlation_for_all_traits(this_trait, this_dataset, trait_geneid_dict, corr_params) + sorted_results = collections.OrderedDict(sorted(corr_results.items(), + key=lambda t: -abs(t[1][1]))) + else: + for target_trait, target_vals in target_dataset.trait_data.iteritems(): + result = get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, corr_params['type']) + if result is not None: + corr_results[target_trait] = result + + sorted_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0]))) + + return sorted_results + +def do_tissue_correlation_for_all_traits(this_trait, trait_symbol_dict, corr_params, tissue_dataset_id=1): + #Gets tissue expression values for the primary trait + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list = [this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower()] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(symbol_list=trait_symbol_dict.values()) + + tissue_corr_data = {} + for trait, symbol in trait_symbol_dict.iteritems(): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + + result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, + this_trait_tissue_values, + corr_params['method']) + + tissue_corr_data[trait] = [result[0], result[1], result[2], symbol] + + return tissue_corr_data + +def do_literature_correlation_for_all_traits(this_trait, target_dataset, trait_geneid_dict, corr_params): + input_trait_mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), this_trait.geneid) + + lit_corr_data = {} + for trait, gene_id in trait_geneid_dict.iteritems(): + mouse_gene_id = convert_to_mouse_gene_id(target_dataset.group.species.lower(), gene_id) + + if mouse_gene_id and str(mouse_gene_id).find(";") == -1: + result = g.db.execute( + """SELECT value + FROM LCorrRamin3 + WHERE GeneId1='%s' and + GeneId2='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if not result: + result = g.db.execute("""SELECT value + FROM LCorrRamin3 + WHERE GeneId2='%s' and + GeneId1='%s' + """ % (escape(mouse_gene_id), escape(input_trait_mouse_gene_id)) + ).fetchone() + if result: + lit_corr = result.value + lit_corr_data[trait] = [gene_id, lit_corr] + else: + lit_corr_data[trait] = [gene_id, 0] + else: + lit_corr_data[trait] = [gene_id, 0] + + return lit_corr_data + +def get_sample_r_and_p_values(this_trait, this_dataset, target_vals, target_dataset, type): + """ + Calculates the sample r (or rho) and p-value + + Given a primary trait and a target trait's sample values, + calculates either the pearson r or spearman rho and the p-value + using the corresponding scipy functions. + """ + + this_trait_vals = [] + shared_target_vals = [] + for i, sample in enumerate(target_dataset.group.samplelist): + if sample in this_trait.data: + this_sample_value = this_trait.data[sample].value + target_sample_value = target_vals[i] + this_trait_vals.append(this_sample_value) + shared_target_vals.append(target_sample_value) + + this_trait_vals, shared_target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, shared_target_vals) + + if type == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, shared_target_vals) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, shared_target_vals) + + if num_overlap > 5: + if scipy.isnan(sample_r): + return None + else: + return [sample_r, sample_p, num_overlap] + +def convert_to_mouse_gene_id(species=None, gene_id=None): + """If the species is rat or human, translate the gene_id to the mouse geneid + + If there is no input gene_id or there's no corresponding mouse gene_id, return None + + """ + if not gene_id: + return None + + mouse_gene_id = None + + if species == 'mouse': + mouse_gene_id = gene_id + + elif species == 'rat': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE rat='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + elif species == 'human': + + query = """SELECT mouse + FROM GeneIDXRef + WHERE human='%s'""" % escape(gene_id) + + result = g.db.execute(query).fetchone() + if result != None: + mouse_gene_id = result.mouse + + return mouse_gene_id + +def init_corr_params(start_vars): + method = "pearson" + if 'method' in start_vars: + method = start_vars['method'] + + type = "sample" + if 'type' in start_vars: + type = start_vars['type'] + + return_count = 500 + if 'return_count' in start_vars: + assert(start_vars['return_count'].isdigit()) + return_count = int(start_vars['return_count']) + + corr_params = { + 'method' : method, + 'type' : type, + 'return_count' : return_count + } + return corr_params \ No newline at end of file diff --git a/wqflask/wqflask/api/gen_menu.py b/wqflask/wqflask/api/gen_menu.py index c7bcb65d..bdcc3bf7 100644 --- a/wqflask/wqflask/api/gen_menu.py +++ b/wqflask/wqflask/api/gen_menu.py @@ -126,9 +126,7 @@ def build_types(species, group): InbredSet.Name = '{1}' AND ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND - ProbeSetFreeze.public > 0 AND - ProbeSetFreeze.confidentiality < 1 + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name""".format(species, group) results = [] @@ -194,9 +192,7 @@ def build_datasets(species, group, type_name): FROM InfoFiles, GenoFreeze, InbredSet WHERE InbredSet.Name = '{}' AND GenoFreeze.InbredSetId = InbredSet.Id AND - InfoFiles.InfoPageName = GenoFreeze.ShortName AND - GenoFreeze.public > 0 AND - GenoFreeze.confidentiality < 1 + InfoFiles.InfoPageName = GenoFreeze.ShortName ORDER BY GenoFreeze.CreateTime DESC""".format(group)).fetchone() if results != None: @@ -214,8 +210,7 @@ def build_datasets(species, group, type_name): Species.Id = InbredSet.SpeciesId AND InbredSet.Name = '{1}' AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{2}' AND - ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id AND - ProbeSetFreeze.confidentiality < 1 and ProbeSetFreeze.public > 0 + ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id ORDER BY ProbeSetFreeze.CreateTime DESC""".format(species, group, type_name)).fetchall() datasets = [] diff --git a/wqflask/wqflask/api/mapping.py b/wqflask/wqflask/api/mapping.py index d830cefc..92c27c9b 100644 --- a/wqflask/wqflask/api/mapping.py +++ b/wqflask/wqflask/api/mapping.py @@ -4,7 +4,7 @@ import string from base import data_set from base import webqtlConfig -from base.trait import GeneralTrait, retrieve_sample_data +from base.trait import create_trait, retrieve_sample_data from utility import helper_functions from wqflask.marker_regression import gemma_mapping, rqtl_mapping, qtlreaper_mapping, plink_mapping @@ -18,7 +18,7 @@ def do_mapping_for_api(start_vars): dataset = data_set.create_dataset(dataset_name = start_vars['db']) dataset.group.get_markers() - this_trait = GeneralTrait(dataset = dataset, name = start_vars['trait_id']) + this_trait = create_trait(dataset = dataset, name = start_vars['trait_id']) this_trait = retrieve_sample_data(this_trait, dataset) samples = [] diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index b22e0004..4fb8e69b 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -14,9 +14,6 @@ import urlparse import simplejson as json -import redis -Redis = redis.StrictRedis() - from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash, jsonify) @@ -30,8 +27,10 @@ from wqflask import model from utility import Bunch, Struct, hmac from utility.formatting import numify +from utility.redis_tools import get_redis_conn +Redis = get_redis_conn() -from base import trait +from base.trait import create_trait, retrieve_trait_info, jsonable from base.data_set import create_dataset import logging @@ -208,14 +207,14 @@ def view_collection(): if dataset_name == "Temp": group = name.split("_")[2] dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group) - trait_ob = trait.GeneralTrait(name=name, dataset=dataset) + trait_ob = create_trait(name=name, dataset=dataset) else: dataset = create_dataset(dataset_name) - trait_ob = trait.GeneralTrait(name=name, dataset=dataset) - trait_ob = trait.retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) + trait_ob = create_trait(name=name, dataset=dataset) + trait_ob = retrieve_trait_info(trait_ob, dataset, get_qtl_info=True) trait_obs.append(trait_ob) - json_version.append(trait.jsonable(trait_ob)) + json_version.append(jsonable(trait_ob)) collection_info = dict(trait_obs=trait_obs, uc = uc) diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index 21eb1493..5d74dc9d 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -37,7 +37,7 @@ from pprint import pformat as pf import reaper -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers from db import webqtlDatabaseFunction @@ -108,7 +108,7 @@ class ComparisonBarChart(object): trait_name, dataset_name = trait_db.split(":") #print("dataset_name:", dataset_name) dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) self.trait_list.append((trait_ob, dataset_ob)) diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index dfb81c54..04ec427d 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -4,7 +4,7 @@ import math from flask import g -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import corr_result_helpers from scipy import stats @@ -20,9 +20,9 @@ class CorrScatterPlot(object): self.data_set_1 = data_set.create_dataset(params['dataset_1']) self.data_set_2 = data_set.create_dataset(params['dataset_2']) #self.data_set_3 = data_set.create_dataset(params['dataset_3']) - self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1) - self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2) - #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3) + self.trait_1 = create_trait(name=params['trait_1'], dataset=self.data_set_1) + self.trait_2 = create_trait(name=params['trait_2'], dataset=self.data_set_2) + #self.trait_3 = create_trait(name=params['trait_3'], dataset=self.data_set_3) samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b099b83d..7eab7184 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -47,7 +47,7 @@ import reaper from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers, hmac from db import webqtlDatabaseFunction @@ -97,7 +97,7 @@ class CorrelationResults(object): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) self.trait_id = start_vars['trait_id'] - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: @@ -199,7 +199,9 @@ class CorrelationResults(object): range_chr_as_int = order_id for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): - trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + if not trait_object: + continue if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno": #ZS: Convert trait chromosome to an int for the location range option diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index b5c45d05..2b9467d1 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -43,14 +43,16 @@ from pprint import pformat as pf import reaper -import redis -Redis = redis.StrictRedis() +from utility.redis_tools import get_redis_conn +Redis = get_redis_conn() +THIRTY_DAYS = 60 * 60 * 24 * 30 from utility.THCell import THCell from utility.TDCell import TDCell from base.trait import GeneralTrait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers + from db import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlation_functions @@ -204,20 +206,6 @@ class CorrelationMatrix(object): samples = self.all_sample_list, sample_data = self.sample_data,) # corr_results = [result[1] for result in result_row for result_row in self.corr_results]) - - def get_trait_db_obs(self, trait_db_list): - - self.trait_list = [] - for i, trait_db in enumerate(trait_db_list): - if i == (len(trait_db_list) - 1): - break - trait_name, dataset_name = trait_db.split(":") - #print("dataset_name:", dataset_name) - dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, - name=trait_name, - cellid=None) - self.trait_list.append((trait_ob, dataset_ob)) def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') @@ -257,7 +245,7 @@ class CorrelationMatrix(object): this_vals_string += "x " this_vals_string = this_vals_string[:-1] - Redis.set(trait_id, this_vals_string) + Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS) self.pca_trait_ids.append(trait_id) return pca diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py index 4415b86a..35067036 100644 --- a/wqflask/wqflask/ctl/ctl_analysis.py +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -17,7 +17,7 @@ import csv import itertools from base import data_set -from base import trait as TRAIT +from base.trait import create_trait, retrieve_sample_data from utility import helper_functions from utility.tools import locate, GN2_BRANCH_URL @@ -122,8 +122,8 @@ class CTL(object): logger.debug("retrieving data for", trait) if trait != "": ts = trait.split(':') - gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) - gt = TRAIT.retrieve_sample_data(gt, dataset, individuals) + gt = create_trait(name = ts[0], dataset_name = ts[1]) + gt = retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in gt.data.keys(): traits.append(gt.data[ind].value) @@ -180,8 +180,8 @@ class CTL(object): logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target - gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB - gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB + gtS = create_trait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB + gtT = create_trait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index b0ca5ced..1e15d28f 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -34,10 +34,7 @@ class DoSearch(object): self.search_type = search_type if self.dataset: - logger.debug("self.dataset is boo: ", type(self.dataset), pf(self.dataset)) - logger.debug("self.dataset.group is: ", pf(self.dataset.group)) #Get group information for dataset and the species id - self.species_id = webqtlDatabaseFunction.retrieve_species_id(self.dataset.group.name) def execute(self, query): @@ -54,10 +51,6 @@ class DoSearch(object): return keyword - #def escape(self, stringy): - # """Shorter name than self.db_conn.escape_string""" - # return escape(str(stringy)) - def mescape(self, *items): """Multiple escape""" escaped = [escape(str(item)) for item in items] @@ -71,8 +64,6 @@ class DoSearch(object): @classmethod def get_search(cls, search_type): - logger.debug("search_types are:", pf(cls.search_types)) - search_type_string = search_type['dataset_type'] if 'key' in search_type and search_type['key'] != None: search_type_string += '_' + search_type['key'] @@ -648,7 +639,7 @@ class CisTransLrsSearch(DoSearch): escape(self.dataset.type), chromosome) else: - location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type)) + location_clause = "(ABS(%s.Mb-Geno.Mb) %s %s and %s.Chr = Geno.Chr) or (%s.Chr != Geno.Chr)" % (escape(self.dataset.type), the_operator, escape(str(self.mb_buffer)), escape(self.dataset.type), escape(self.dataset.type)) where_clause = sub_clause + """ %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 04e3d578..c65a1415 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -4,7 +4,7 @@ import json from flask import Flask, g from base.data_set import create_dataset -from base.trait import GeneralTrait +from base.trait import create_trait from db import webqtlDatabaseFunction from base import webqtlConfig @@ -96,7 +96,9 @@ class GSearch(object): #dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) #trait_id = line[4] #with Bench("Building trait object"): - trait_ob = GeneralTrait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + if not trait_ob: + continue max_lrs_text = "N/A" if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": max_lrs_text = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) @@ -210,13 +212,12 @@ class GSearch(object): if line[11] != "" and line[11] != None: this_trait['additive'] = '%.3f' % line[11] - #dataset = create_dataset(line[2], "Publish") - #trait_id = line[3] - #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) this_trait['max_lrs_text'] = "N/A" + trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) + if not trait_ob: + continue if this_trait['dataset'] == this_trait['group'] + "Publish": try: - trait_ob = GeneralTrait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) except: diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index a648667b..74fa4329 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -246,6 +246,12 @@ class DisplayMappingResults(object): if 'output_files' in start_vars: self.output_files = ",".join(start_vars['output_files']) + self.categorical_vars = "" + self.perm_strata = "" + if 'perm_strata' in start_vars.keys() and 'categorical_vars' in start_vars.keys(): + self.categorical_vars = start_vars['categorical_vars'] + self.perm_strata = start_vars['perm_strata'] + self.selectedChr = int(start_vars['selected_chr']) self.strainlist = start_vars['samples'] diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index e2b15c26..88d27517 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,7 +1,7 @@ import os, math, string, random, json from base import webqtlConfig -from base.trait import GeneralTrait +from base.trait import create_trait from base.data_set import create_dataset from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE @@ -129,7 +129,7 @@ def gen_covariates_file(this_dataset, covariates, samples): this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) diff --git a/wqflask/wqflask/marker_regression/rqtl_mapping.py b/wqflask/wqflask/marker_regression/rqtl_mapping.py index e4a4d127..c5590a85 100644 --- a/wqflask/wqflask/marker_regression/rqtl_mapping.py +++ b/wqflask/wqflask/marker_regression/rqtl_mapping.py @@ -6,7 +6,7 @@ import json from flask import g from base.webqtlConfig import TMPDIR -from base.trait import GeneralTrait +from base.trait import create_trait from base.data_set import create_dataset from utility import webqtlUtil from utility.tools import locate, TEMPDIR @@ -86,7 +86,6 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec cross_object = add_phenotype(cross_object, pheno_string, "the_pheno") # Add the phenotype cross_object = add_names(cross_object, names_string, "the_names") # Add the phenotype logger.info("Added pheno and names"); - # Scan for QTLs marker_covars = create_marker_covariates(control_marker, cross_object) # Create the additive covariate markers logger.info("Marker covars done"); if cofactors != "": @@ -115,6 +114,7 @@ def run_rqtl_geno(vals, samples, dataset, mapping_scale, method, model, permChec else: if do_control == "true" or cofactors != "": logger.info("Using covariate"); result_data_frame = scanone(cross_object, pheno = "the_pheno", addcovar = covars, model=model, method=method) + ro.r('save.image(file = "/home/zas1024/gn2-zach/itp_cofactor_test.RData")') else: logger.info("No covariates"); result_data_frame = scanone(cross_object, pheno = "the_pheno", model=model, method=method) @@ -295,7 +295,7 @@ def add_cofactors(cross, this_dataset, covariates, samples): covar_as_string = "c(" trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) @@ -321,27 +321,27 @@ def add_cofactors(cross, this_dataset, covariates, samples): datatype = get_trait_data_type(covariate) logger.info("Covariate: " + covariate + " is of type: " + datatype); if(datatype == "categorical"): # Cat variable - logger.info("call of add_categorical_covar"); - cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross - logger.info("add_categorical_covar returned"); - for z, col_name in enumerate(col_names): # Go through the additional covar names + logger.info("call of add_categorical_covar"); + cross, col_names = add_categorical_covar(cross, covar_as_string, i) # Expand and add it to the cross + logger.info("add_categorical_covar returned"); + for z, col_name in enumerate(col_names): # Go through the additional covar names + if i < (len(covariate_list) - 1): + covar_name_string += '"' + col_name + '", ' + else: + if(z < (len(col_names) -1)): + covar_name_string += '"' + col_name + '", ' + else: + covar_name_string += '"' + col_name + '"' + + logger.info("covar_name_string:" + covar_name_string) + else: + col_name = "covar_" + str(i) + cross = add_phenotype(cross, covar_as_string, col_name) if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - if(z < (len(col_names) -1)): covar_name_string += '"' + col_name + '", ' - else: + else: covar_name_string += '"' + col_name + '"' - logger.info("covar_name_string:" + covar_name_string); - else: - col_name = "covar_" + str(i) - cross = add_phenotype(cross, covar_as_string, col_name) - if i < (len(covariate_list) - 1): - covar_name_string += '"' + col_name + '", ' - else: - covar_name_string += '"' + col_name + '"' - covar_name_string += ")" logger.info("covar_name_string:" + covar_name_string); covars_ob = pull_var("trait_covars", cross, covar_name_string) @@ -350,9 +350,13 @@ def add_cofactors(cross, this_dataset, covariates, samples): def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix - userinputS = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user - covariate_names = ', '.join('"{0}"'.format(w) for w in userinputS) - ro.r('covnames <- c(' + covariate_names + ')') + userinput_sanitized = control_marker.replace(" ", "").split(",") # TODO: sanitize user input, Never Ever trust a user + logger.debug(userinput_sanitized) + if len(userinput_sanitized) > 0: + covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) + ro.r('covnames <- c(' + covariate_names + ')') + else: + ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") @@ -404,16 +408,4 @@ def process_rqtl_results(result, species_name): # TODO: how to make this marker['lod_score'] = output[i][2] qtl_results.append(marker) - return qtl_results - -def get_trait_data_type(trait_db_string): - # Get a trait's type (numeric, categorical, etc) from the DB - the_query = "SELECT value FROM TraitMetadata WHERE type='trait_data_type'" - results_json = g.db.execute(the_query).fetchone() - - results_ob = json.loads(results_json[0]) - - if trait_db_string in results_ob: - return results_ob[trait_db_string] - else: - return "numeric" + return qtl_results \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/run_mapping.py b/wqflask/wqflask/marker_regression/run_mapping.py index 5f7710ab..0711b852 100644 --- a/wqflask/wqflask/marker_regression/run_mapping.py +++ b/wqflask/wqflask/marker_regression/run_mapping.py @@ -161,7 +161,7 @@ class RunMapping(object): self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] - self.covariates = start_vars['covariates'] if "covariates" in start_vars else None + self.covariates = start_vars['covariates'] if "covariates" in start_vars else "" #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 @@ -467,6 +467,7 @@ class RunMapping(object): #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, + categorical_vars = self.categorical_vars, chr_lengths = chr_lengths, num_perm = self.num_perm, perm_results = self.perm_output, diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py index 152e4168..f41f3017 100644 --- a/wqflask/wqflask/network_graph/network_graph.py +++ b/wqflask/wqflask/network_graph/network_graph.py @@ -44,7 +44,7 @@ import reaper from utility.THCell import THCell from utility.TDCell import TDCell -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from utility import webqtlUtil, helper_functions, corr_result_helpers from utility.tools import GN2_BRANCH_URL @@ -217,7 +217,7 @@ class NetworkGraph(object): break trait_name, dataset_name = trait_db.split(":") dataset_ob = data_set.create_dataset(dataset_name) - trait_ob = GeneralTrait(dataset=dataset_ob, + trait_ob = create_trait(dataset=dataset_ob, name=trait_name, cellid=None) self.trait_list.append((trait_ob, dataset_ob)) \ No newline at end of file diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 8f702d58..de4b01eb 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -1,15 +1,9 @@ -# from __future__ import absolute_import, print_function, division +from __future__ import absolute_import, print_function, division - -import os -import cPickle import re import uuid from math import * import time -import math -import datetime -import collections import re import requests @@ -18,18 +12,16 @@ from pprint import pformat as pf import json from base.data_set import create_dataset -from base import trait +from base.trait import create_trait from wqflask import parser from wqflask import do_search -from utility import webqtlUtil,tools from db import webqtlDatabaseFunction -from flask import render_template, Flask, g +from flask import Flask, g -from utility import formatting -from utility import hmac +from utility import hmac, helper_functions from utility.tools import GN2_BASE_URL -from utility.type_checking import is_float, is_int, is_str, get_float, get_int, get_string +from utility.type_checking import is_str from utility.logger import getLogger logger = getLogger(__name__ ) @@ -86,7 +78,7 @@ views.py). try: self.search() except: - self.search_term_exists = False + self.search_term_exists = False if self.search_term_exists: self.gen_search_result() @@ -113,50 +105,49 @@ views.py). trait_dict = {} trait_id = result[0] - trait_dict['index'] = index + 1 - this_trait = trait.GeneralTrait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - trait_dict['name'] = this_trait.name - if this_trait.dataset.type == "Publish": - trait_dict['display_name'] = this_trait.display_name - else: - trait_dict['display_name'] = this_trait.name - trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) - if this_trait.dataset.type == "ProbeSet": - trait_dict['symbol'] = this_trait.symbol - trait_dict['description'] = this_trait.description_display.decode('utf-8', 'replace') - trait_dict['location'] = this_trait.location_repr - trait_dict['mean'] = "N/A" - trait_dict['additive'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = '%.3f' % this_trait.mean - trait_dict['lrs_score'] = this_trait.LRS_score_repr - trait_dict['lrs_location'] = this_trait.LRS_location_repr - if this_trait.additive != "": - trait_dict['additive'] = '%.3f' % this_trait.additive - elif this_trait.dataset.type == "Geno": - trait_dict['location'] = this_trait.location_repr - elif this_trait.dataset.type == "Publish": - trait_dict['description'] = this_trait.description_display - trait_dict['authors'] = this_trait.authors - trait_dict['pubmed_id'] = "N/A" - if this_trait.pubmed_id: - trait_dict['pubmed_id'] = this_trait.pubmed_id - trait_dict['pubmed_link'] = this_trait.pubmed_link - trait_dict['pubmed_text'] = this_trait.pubmed_text - trait_dict['mean'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = '%.3f' % this_trait.mean - trait_dict['lrs_score'] = this_trait.LRS_score_repr - trait_dict['lrs_location'] = this_trait.LRS_location_repr - trait_dict['additive'] = "N/A" - if this_trait.additive != "": - trait_dict['additive'] = '%.3f' % this_trait.additive - trait_list.append(trait_dict) - #json_trait_list.append(trait.jsonable_table_row(this_trait, self.dataset.name, index + 1)) + this_trait = create_trait(dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + if this_trait: + trait_dict['index'] = index + 1 + trait_dict['name'] = this_trait.name + if this_trait.dataset.type == "Publish": + trait_dict['display_name'] = this_trait.display_name + else: + trait_dict['display_name'] = this_trait.name + trait_dict['dataset'] = this_trait.dataset.name + trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name)) + if this_trait.dataset.type == "ProbeSet": + trait_dict['symbol'] = this_trait.symbol + trait_dict['description'] = this_trait.description_display.decode('utf-8', 'replace') + trait_dict['location'] = this_trait.location_repr + trait_dict['mean'] = "N/A" + trait_dict['additive'] = "N/A" + if this_trait.mean != "" and this_trait.mean != None: + trait_dict['mean'] = '%.3f' % this_trait.mean + trait_dict['lrs_score'] = this_trait.LRS_score_repr + trait_dict['lrs_location'] = this_trait.LRS_location_repr + if this_trait.additive != "": + trait_dict['additive'] = '%.3f' % this_trait.additive + elif this_trait.dataset.type == "Geno": + trait_dict['location'] = this_trait.location_repr + elif this_trait.dataset.type == "Publish": + trait_dict['description'] = this_trait.description_display + trait_dict['authors'] = this_trait.authors + trait_dict['pubmed_id'] = "N/A" + if this_trait.pubmed_id: + trait_dict['pubmed_id'] = this_trait.pubmed_id + trait_dict['pubmed_link'] = this_trait.pubmed_link + trait_dict['pubmed_text'] = this_trait.pubmed_text + trait_dict['mean'] = "N/A" + if this_trait.mean != "" and this_trait.mean != None: + trait_dict['mean'] = '%.3f' % this_trait.mean + trait_dict['lrs_score'] = this_trait.LRS_score_repr + trait_dict['lrs_location'] = this_trait.LRS_location_repr + trait_dict['additive'] = "N/A" + if this_trait.additive != "": + trait_dict['additive'] = '%.3f' % this_trait.additive + trait_list.append(trait_dict) self.trait_list = json.dumps(trait_list) - #self.json_trait_list = json.dumps(json_trait_list) def search(self): """ @@ -234,7 +225,6 @@ views.py). self.header_fields = the_search.header_fields def get_search_ob(self, a_search): - logger.debug("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] search_operator = a_search['separator'] search_type = {} @@ -243,12 +233,10 @@ views.py). search_type['key'] = a_search['key'].upper() else: search_type['key'] = None - logger.debug("search_type is:", pf(search_type)) search_ob = do_search.DoSearch.get_search(search_type) if search_ob: search_class = getattr(do_search, search_ob) - logger.debug("search_class is: ", pf(search_class)) the_search = search_class(search_term, search_operator, self.dataset, diff --git a/wqflask/wqflask/show_trait/export_trait_data.py b/wqflask/wqflask/show_trait/export_trait_data.py index 107f87c6..253c887b 100644 --- a/wqflask/wqflask/show_trait/export_trait_data.py +++ b/wqflask/wqflask/show_trait/export_trait_data.py @@ -4,7 +4,7 @@ import simplejson as json from pprint import pformat as pf -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set def export_sample_table(targs): @@ -26,7 +26,7 @@ def export_sample_table(targs): def get_export_metadata(trait_id, dataset_name): dataset = data_set.create_dataset(dataset_name) - this_trait = GeneralTrait(dataset=dataset, + this_trait = create_trait(dataset=dataset, name=trait_id, cellid=None, get_qtl_info=False) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 29b2f77e..c77e247f 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -10,9 +10,6 @@ import json as json from collections import OrderedDict -import redis -Redis = redis.StrictRedis() - import numpy as np import scipy.stats as ss @@ -21,11 +18,15 @@ from flask import Flask, g from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList -from utility import webqtlUtil, Plot, Bunch, helper_functions -from utility.tools import locate_ignore_error -from base.trait import GeneralTrait +from base.trait import create_trait from base import data_set from db import webqtlDatabaseFunction +from utility import webqtlUtil, Plot, Bunch, helper_functions +from utility.authentication_tools import check_owner +from utility.tools import locate_ignore_error +from utility.redis_tools import get_redis_conn, get_resource_id +Redis = get_redis_conn() +ONE_YEAR = 60 * 60 * 24 * 365 from pprint import pformat as pf @@ -55,9 +56,9 @@ class ShowTrait(object): self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) # Put values in Redis so they can be looked up later if added to a collection - Redis.set(self.trait_id, kw['trait_paste']) + Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR) self.trait_vals = kw['trait_paste'].split() - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: @@ -66,11 +67,13 @@ class ShowTrait(object): self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) - self.this_trait = GeneralTrait(dataset=self.dataset, + self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() + self.resource_id = check_owner(self.dataset, self.trait_id) + #ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.sequence diff --git a/wqflask/wqflask/templates/admin/group_manager.html b/wqflask/wqflask/templates/admin/group_manager.html index ac5c1350..b7df1aad 100644 --- a/wqflask/wqflask/templates/admin/group_manager.html +++ b/wqflask/wqflask/templates/admin/group_manager.html @@ -2,17 +2,25 @@ {% block title %}Group Manager{% endblock %} {% block content %} <!-- Start of body --> - {{ header("List of groups", "" )}} - <div class="container"> <div class="page-header"> <h1>Manage Groups</h1> + <button type="button" id="remove_groups" class="btn btn-primary" data-url="/groups/remove">Remove Selected Groups</button> </div> - <form action="/manage/groups" method="POST"> + <form id="groups_form" action="/groups/manage" method="POST"> + <input type="hidden" name="selected_group_ids" value=""> <div class="container" style="margin-bottom: 30px;"> + {% if admin_groups|length == 0 and user_groups|length == 0 %} + <h4>You currently aren't a member or admin of any groups.</h4> + <br> + <button type="submit" name="add_new_group" class="btn btn-primary">Create a new group</button> + {% else %} <div><h3>Admin Groups</h3></div> <hr> - <table id="admin_groups" class="table table-hover"> + {% if admin_groups|length == 0 %} + <h4>You currently aren't the administrator of any groups.</h4> + {% else %} + <table id="admin_groups" class="table table-hover" style="min-width: 800px; max-width: 1000px;"> <thead> <tr> <th></th> @@ -26,7 +34,7 @@ <tbody> {% for group in admin_groups %} <tr> - <td><input type="checkbox" name="read" value="{{ group.id }}"></td> + <td><input type="checkbox" name="group_id" value="{{ group.id }}"></td> <td>{{ loop.index }}</td> <td>{{ group.name }}</td> <td>{{ group.admins|length + group.users|length }}</td> @@ -36,12 +44,16 @@ {% endfor %} </tbody> </table> + {% endif %} </div> <hr> <div class="container"> <div><h3>User Groups</h3></div> <hr> - <table id="user_groups" class="table table-hover"> + {% if user_groups|length == 0 %} + <h4>You currently aren't a member of any groups.</h4> + {% else %} + <table id="user_groups" class="table table-hover" style="min-width: 800px; max-width: 1000px;"> <thead> <tr> <th></th> @@ -65,12 +77,12 @@ {% endfor %} </tbody> </table> + {% endif %} + {% endif %} </div> </form> </div> - - <!-- End of body --> {% endblock %} @@ -79,7 +91,6 @@ <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/js/jquery.js"></script> <script language="javascript" type="text/javascript" src="{{ url_for('js', filename='DataTables/js/jquery.dataTables.min.js') }}"></script> - <script language="javascript" type="text/javascript" src="/static/packages/TableTools/media/js/TableTools.min.js"></script> <script language="javascript" type="text/javascript" src="/static/packages/underscore/underscore-min.js"></script> <script type="text/javascript" charset="utf-8"> @@ -113,6 +124,22 @@ "paging": false, "orderClasses": true } ); + + submit_special = function(url) { + $("#groups_form").attr("action", url); + return $("#groups_form").submit(); + }; + + $("#remove_groups").on("click", function() { + url = $(this).data("url") + groups = [] + $("input[name=group_id]:checked").each(function() { + groups.push($(this).val()); + }); + groups_string = groups.join(":") + $("input[name=selected_group_ids]").val(groups_string) + return submit_special(url) + }); }); </script> {% endblock %} diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 1c84239c..71705390 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -1,4 +1,5 @@ {% extends "base.html" %} +{% block title %}Correlation Results{% endblock %} {% block css %} <link rel="stylesheet" type="text/css" href="{{ url_for('css', filename='DataTables/css/jquery.dataTables.css') }}" /> <link rel="stylesheet" type="text/css" href="/static/new/packages/DataTables/extensions/buttons.bootstrap.css" /> diff --git a/wqflask/wqflask/templates/email/verification.txt b/wqflask/wqflask/templates/email/verification.txt deleted file mode 100644 index 76149a3a..00000000 --- a/wqflask/wqflask/templates/email/verification.txt +++ /dev/null @@ -1,7 +0,0 @@ -Thank you for signing up for GeneNetwork. - -We need to verify your email address. - -To do that please click the following link, or cut and paste it into your browser window: - -{{ url_for_hmac("verify_email", code = verification_code, _external=True )}} diff --git a/wqflask/wqflask/templates/gsearch_pheno.html b/wqflask/wqflask/templates/gsearch_pheno.html index 05b2f988..04b45659 100644 --- a/wqflask/wqflask/templates/gsearch_pheno.html +++ b/wqflask/wqflask/templates/gsearch_pheno.html @@ -31,7 +31,7 @@ </form> <br /> <br /> - <div style="width: 100%;"> + <div style="min-width: 2000px; width: 100%;"> <table id="trait_table" class="table-hover table-striped cell-border" style="float: left;"> <tbody> <td colspan="100%" align="center"><br><b><font size="15">Loading...</font></b><br></td> diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html index b4429b46..c5d49168 100644 --- a/wqflask/wqflask/templates/mapping_results.html +++ b/wqflask/wqflask/templates/mapping_results.html @@ -41,7 +41,8 @@ <input type="hidden" name="selected_chr" value="{{ selectedChr }}"> <input type="hidden" name="manhattan_plot" value="{{ manhattan_plot }}"> <input type="hidden" name="num_perm" value="{{ nperm }}"> - <input type="hidden" name="perm_results" value=""> + <input type="hidden" name="perm_info" value=""> + <input type="hidden" name="perm_strata" value="{{ perm_strata }}"> <input type="hidden" name="num_bootstrap" value="{{ nboot }}"> <input type="hidden" name="do_control" value="{{ doControl }}"> <input type="hidden" name="control_marker" value="{{ controlLocus }}"> @@ -464,13 +465,27 @@ {% if mapping_method != "gemma" and mapping_method != "plink" %} $('#download_perm').click(function(){ - var num_perm, perm_data; - num_perm = js_data.num_perm - perm_data = js_data.perm_results - json_perm_data = JSON.stringify(perm_data); - $('input[name=perm_results]').val(json_perm_data); - $('#marker_regression_form').attr('action', '/export_perm_data'); - return $('#marker_regression_form').submit(); + perm_info_dict = { + perm_data: js_data.perm_results, + num_perm: "{{ nperm }}", + trait_name: "{{ this_trait.display_name }}", + trait_description: "{{ this_trait.description_display }}", + cofactors: "{{ covariates }}", + n_samples: {{ n_samples }}, + n_genotypes: {{ qtl_results|length }}, + {% if genofile_string is defined %} + genofile: "{{ genofile_string }}", + {% else %} + genofile: "", + {% endif %} + units_linkage: "{{ LRS_LOD }}", + strat_cofactors: js_data.categorical_vars + } + json_perm_data = JSON.stringify(perm_info_dict); + + $('input[name=perm_info]').val(json_perm_data); + $('#marker_regression_form').attr('action', '/export_perm_data'); + return $('#marker_regression_form').submit(); }); modebar_options = { diff --git a/wqflask/wqflask/user_login.py b/wqflask/wqflask/user_login.py index edd272c2..cfee0079 100644 --- a/wqflask/wqflask/user_login.py +++ b/wqflask/wqflask/user_login.py @@ -12,9 +12,6 @@ import requests import simplejson as json -import redis # used for collections -Redis = redis.StrictRedis() - from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash, abort) @@ -23,7 +20,8 @@ from wqflask import pbkdf2 from wqflask.user_session import UserSession from utility import hmac -from utility.redis_tools import is_redis_available, get_user_id, get_user_by_unique_column, set_user_attribute, save_user, save_verification_code, check_verification_code, get_user_collections, save_collections +from utility.redis_tools import is_redis_available, get_redis_conn, get_user_id, get_user_by_unique_column, set_user_attribute, save_user, save_verification_code, check_verification_code, get_user_collections, save_collections +Redis = get_redis_conn() from utility.logger import getLogger logger = getLogger(__name__) @@ -127,7 +125,7 @@ def send_email(toaddr, msg, fromaddr="no-reply@genenetwork.org"): server.quit() logger.info("Successfully sent email to "+toaddr) -def send_verification_email(user_details, template_name = "email/verification.txt", key_prefix = "verification_code", subject = "GeneNetwork email verification"): +def send_verification_email(user_details, template_name = "email/user_verification.txt", key_prefix = "verification_code", subject = "GeneNetwork e-mail verification"): verification_code = str(uuid.uuid4()) key = key_prefix + ":" + verification_code @@ -141,6 +139,21 @@ def send_verification_email(user_details, template_name = "email/verification.tx send_email(recipient, subject, body) return {"recipient": recipient, "subject": subject, "body": body} +@app.route("/manage/verify_email") +def verify_email(): + if 'code' in request.args: + user_details = check_verification_code(request.args['code']) + if user_details: + # As long as they have access to the email account + # We might as well log them in + session_id_signed = get_signed_session_id(user_details) + flash("Thank you for logging in {}.".format(user_details['full_name']), "alert-success") + response = make_response(redirect(url_for('index_page', import_collections = import_col, anon_id = anon_id))) + response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) + return response + else: + flash("Invalid code: Password reset code does not exist or might have expired!", "error") + @app.route("/n/login", methods=('GET', 'POST')) def login(): params = request.form if request.form else request.args @@ -204,7 +217,7 @@ def login(): response.set_cookie(UserSession.user_cookie_name, session_id_signed, max_age=None) return response else: - email_ob = send_verification_email(user_details) + email_ob = send_verification_email(user_details, template_name = "email/user_verification.txt") return render_template("newsecurity/verification_still_needed.html", subject=email_ob['subject']) else: # Incorrect password #ZS: It previously seemed to store that there was an incorrect log-in attempt here, but it did so in the MySQL DB so this might need to be reproduced with Redis @@ -374,16 +387,13 @@ def password_reset(): hmac = request.args.get('hm') if verification_code: - user_email = check_verification_code(verification_code) - if user_email: - user_details = get_user_by_unique_column('email_address', user_email) - if user_details: - return render_template( - "new_security/password_reset.html", user_encode=user_details["email_address"]) - else: - flash("Invalid code: User no longer exists!", "error") + user_details = check_verification_code(verification_code) + if user_details: + return render_template( + "new_security/password_reset.html", user_encode=user_details["email_address"]) else: flash("Invalid code: Password reset code does not exist or might have expired!", "error") + return redirect(url_for("login")) else: return redirect(url_for("login")) @@ -394,6 +404,7 @@ def password_reset_step2(): errors = [] user_email = request.form['user_encode'] + user_id = get_user_id("email_address", user_email) password = request.form['password'] encoded_password = set_password(password) @@ -401,9 +412,7 @@ def password_reset_step2(): set_user_attribute(user_id, "password", encoded_password) flash("Password changed successfully. You can now sign in.", "alert-info") - response = make_response(redirect(url_for('login'))) - - return response + return redirect(url_for('login')) def register_user(params): thank_you_mode = False diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index 50419146..ec6d4ae3 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -6,10 +6,6 @@ import uuid import simplejson as json -import redis # used for collections -Redis = redis.StrictRedis() - - from flask import (Flask, g, render_template, url_for, request, make_response, redirect, flash, abort) @@ -17,7 +13,8 @@ from wqflask import app from utility import hmac #from utility.elasticsearch_tools import get_elasticsearch_connection -from utility.redis_tools import get_user_id, get_user_by_unique_column, get_user_collections, save_collections +from utility.redis_tools import get_redis_conn, get_user_id, get_user_collections, save_collections +Redis = get_redis_conn() from utility.logger import getLogger logger = getLogger(__name__) @@ -29,6 +26,11 @@ THIRTY_DAYS = 60 * 60 * 24 * 30 def get_user_session(): logger.info("@app.before_request get_session") g.user_session = UserSession() + #ZS: I think this should solve the issue of deleting the cookie and redirecting to the home page when a user's session has expired + if not g.user_session: + response = make_response(redirect(url_for('login'))) + response.set_cookie('session_id_v2', '', expires=0) + return response @app.after_request def set_user_session(response): @@ -37,7 +39,6 @@ def set_user_session(response): response.set_cookie(g.user_session.cookie_name, g.user_session.cookie) return response - def verify_cookie(cookie): the_uuid, separator, the_signature = cookie.partition(':') assert len(the_uuid) == 36, "Is session_id a uuid?" @@ -88,14 +89,11 @@ class UserSession(object): user_id = str(uuid.uuid4())) Redis.hmset(self.redis_key, self.record) Redis.expire(self.redis_key, THIRTY_DAYS) - response = make_response(redirect(url_for('login'))) - response.set_cookie(self.user_cookie_name, '', expires=0) ########### Grrr...this won't work because of the way flask handles cookies # Delete the cookie flash("Due to inactivity your session has expired. If you'd like please login again.") - return response - #return + return None else: self.record = dict(login_time = time.time(), user_type = "anon", diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 938570f3..24a4dcee 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -23,16 +23,13 @@ import uuid import simplejson as json import yaml -#Switching from Redis to StrictRedis; might cause some issues -import redis -Redis = redis.StrictRedis() - import flask import base64 import array import sqlalchemy from wqflask import app -from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect +from flask import g, Response, request, make_response, render_template, send_from_directory, jsonify, redirect, url_for +from wqflask import group_manager from wqflask import search_results from wqflask import export_traits from wqflask import gsearch @@ -55,11 +52,13 @@ from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis from wqflask.ctl import ctl_analysis from wqflask.snp_browser import snp_browser -#from wqflask.trait_submission import submit_trait from utility import temp_data from utility.tools import SQL_URI,TEMPDIR,USE_REDIS,USE_GN_SERVER,GN_SERVER_URL,GN_VERSION,JS_TWITTER_POST_FETCHER_PATH,JS_GUIX_PATH, CSS_PATH from utility.helper_functions import get_species_groups +from utility.authentication_tools import check_resource_availability +from utility.redis_tools import get_redis_conn +Redis = get_redis_conn() from base.webqtlConfig import GENERATED_IMAGE_DIR from utility.benchmark import Bench @@ -87,6 +86,24 @@ def connect_db(): g.db = g._database = sqlalchemy.create_engine(SQL_URI, encoding="latin1") logger.debug(g.db) +@app.before_request +def check_access_permissions(): + logger.debug("@app.before_request check_access_permissions") + if "temp_trait" in request.args: + if request.args['temp_trait'] == "True": + pass + else: + if 'dataset' in request.args: + dataset = create_dataset(request.args['dataset']) + logger.debug("USER:", Redis.hget("users")) + if 'trait_id' in request.args: + available = check_resource_availability(dataset, request.args['trait_id']) + else: + available = check_resource_availability(dataset) + + if not available: + return redirect(url_for("no_access_page")) + @app.teardown_appcontext def shutdown_session(exception=None): db = getattr(g, '_database', None) @@ -120,6 +137,10 @@ def handle_bad_request(e): resp.set_cookie(err_msg[:32],animation) return resp +@app.route("/authentication_needed") +def no_access_page(): + return render_template("new_security/not_authenticated.html") + @app.route("/") def index_page(): logger.info("Sending index_page") @@ -401,25 +422,43 @@ def export_traits_csv(): def export_perm_data(): """CSV file consisting of the permutation data for the mapping results""" logger.info(request.url) - num_perm = float(request.form['num_perm']) - perm_data = json.loads(request.form['perm_results']) + perm_info = json.loads(request.form['perm_info']) + + now = datetime.datetime.now() + time_str = now.strftime('%H:%M_%d%B%Y') + + file_name = "Permutation_" + perm_info['num_perm'] + "_" + perm_info['trait_name'] + "_" + time_str + + the_rows = [ + ["#Permutation Test"], + ["#File_name: " + file_name], + ["#Metadata: From GeneNetwork.org"], + ["#Trait_ID: " + perm_info['trait_name']], + ["#Trait_description: " + perm_info['trait_description']], + ["#N_permutations: " + str(perm_info['num_perm'])], + ["#Cofactors: " + perm_info['cofactors']], + ["#N_cases: " + str(perm_info['n_samples'])], + ["#N_genotypes: " + str(perm_info['n_genotypes'])], + ["#Genotype_file: " + perm_info['genofile']], + ["#Units_linkage: " + perm_info['units_linkage']], + ["#Permutation_stratified_by: " + ", ".join([ str(cofactor) for cofactor in perm_info['strat_cofactors']])], + ["#RESULTS_1: Suggestive LRS(p=0.63) = " + str(np.percentile(np.array(perm_info['perm_data']), 67))], + ["#RESULTS_2: Significant LRS(p=0.05) = " + str(np.percentile(np.array(perm_info['perm_data']), 95))], + ["#RESULTS_3: Highly Significant LRS(p=0.01) = " + str(np.percentile(np.array(perm_info['perm_data']), 99))], + ["#Comment: Results sorted from low to high peak linkage"] + ] buff = StringIO.StringIO() writer = csv.writer(buff) - writer.writerow(["Suggestive LRS (p=0.63) = " + str(np.percentile(np.array(perm_data), 67))]) - writer.writerow(["Significant LRS (p=0.05) = " + str(np.percentile(np.array(perm_data), 95))]) - writer.writerow(["Highly Significant LRS (p=0.01) = " + str(np.percentile(np.array(perm_data), 99))]) - writer.writerow("") - writer.writerow([str(num_perm) + " Permutations"]) - writer.writerow("") - for item in perm_data: + writer.writerows(the_rows) + for item in perm_info['perm_data']: writer.writerow([item]) csv_data = buff.getvalue() buff.close() return Response(csv_data, mimetype='text/csv', - headers={"Content-Disposition":"attachment;filename=perm_data.csv"}) + headers={"Content-Disposition":"attachment;filename=" + file_name + ".csv"}) @app.route("/show_temp_trait", methods=('POST',)) def show_temp_trait_page(): | 
