diff options
-rw-r--r-- | wqflask/wqflask/do_search.py | 64 | ||||
-rw-r--r-- | wqflask/wqflask/search_results.py | 166 |
2 files changed, 134 insertions, 96 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 761ae326..6b8dfa41 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -81,16 +81,31 @@ class MrnaAssaySearch(DoSearch): DoSearch.search_types['ProbeSet'] = "MrnaAssaySearch" - base_query = """SELECT distinct ProbeSet.Name as TNAME, - 0 as thistable, - ProbeSetXRef.Mean as TMEAN, - ProbeSetXRef.LRS as TLRS, - ProbeSetXRef.PVALUE as TPVALUE, - ProbeSet.Chr_num as TCHR_NUM, - ProbeSet.Mb as TMB, - ProbeSet.Symbol as TSYMBOL, - ProbeSet.name_num as TNAME_NUM - FROM ProbeSetXRef, ProbeSet """ + base_query = """ + SELECT + ProbeSetFreeze.`Name`, + ProbeSetFreeze.`FullName`, + ProbeSet.`Name`, + ProbeSet.`Symbol`, + CAST(ProbeSet.`description` AS BINARY), + CAST(ProbeSet.`Probe_Target_Description` AS BINARY), + ProbeSet.`Chr`, + ProbeSet.`Mb`, + ProbeSetXRef.`Mean`, + ProbeSetXRef.`LRS`, + ProbeSetXRef.`Locus`, + ProbeSetXRef.`pValue`, + ProbeSetXRef.`additive`, + Geno.`Chr` as geno_chr, + Geno.`Mb` as geno_mb + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id` + INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id` + INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id` + INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id` + INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id` + INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId` + LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id` """ header_fields = ['Index', 'Record', @@ -193,10 +208,25 @@ class PhenotypeSearch(DoSearch): DoSearch.search_types['Publish'] = "PhenotypeSearch" base_query = """SELECT PublishXRef.Id, - PublishFreeze.createtime as thistable, - Publication.PubMed_ID as Publication_PubMed_ID, - Phenotype.Post_publication_description as Phenotype_Name - FROM Phenotype, PublishFreeze, Publication, PublishXRef """ + CAST(Phenotype.`Pre_publication_description` AS BINARY), + CAST(Phenotype.`Post_publication_description` AS BINARY), + Publication.`Authors`, + Publication.`Year`, + Publication.`PubMed_ID`, + PublishXRef.`mean`, + PublishXRef.`LRS`, + PublishXRef.`additive`, + PublishXRef.`Locus`, + InbredSet.`InbredSetCode`, + Geno.`Chr`, + Geno.`Mb` + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id` + INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id` + INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id` + INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId` + INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId` + LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id """ search_fields = ('Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', @@ -382,12 +412,10 @@ class RifSearch(MrnaAssaySearch): DoSearch.search_types['ProbeSet_RIF'] = "RifSearch" def get_from_clause(self): - return ", GeneRIF_BASIC " + return f" INNER JOIN GeneRIF_BASIC ON GeneRIF_BASIC.`symbol` = { self.dataset.type }.`symbol` " def get_where_clause(self): - where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and - MATCH (GeneRIF_BASIC.comment) - AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term[0]) + where_clause = f"(MATCH (GeneRIF_BASIC.comment) AGAINST ('+{ self.search_term[0] }' IN BOOLEAN MODE)) " return where_clause diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 78d30ba1..bc0c08a1 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -4,6 +4,7 @@ from math import * import time import re import requests +from types import SimpleNamespace from pprint import pformat as pf @@ -11,6 +12,7 @@ import json from base.data_set import create_dataset from base.trait import create_trait +from base.webqtlConfig import PUBMEDLINK_URL from wqflask import parser from wqflask import do_search from db import webqtlDatabaseFunction @@ -18,13 +20,13 @@ from db import webqtlDatabaseFunction from flask import Flask, g from utility import hmac, helper_functions +from utility.authentication_tools import check_resource_availability from utility.tools import GN2_BASE_URL from utility.type_checking import is_str from utility.logger import getLogger logger = getLogger(__name__) - class SearchResultPage: #maxReturn = 3000 @@ -40,9 +42,7 @@ class SearchResultPage: self.uc_id = uuid.uuid4() self.go_term = None - logger.debug("uc_id:", self.uc_id) # contains a unique id - logger.debug("kw is:", kw) # dict containing search terms if kw['search_terms_or']: self.and_or = "or" self.search_terms = kw['search_terms_or'] @@ -55,7 +55,7 @@ class SearchResultPage: rx = re.compile( r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE) if rx.match(search): - logger.info("Regex failed search") + logger.debug("Regex failed search") self.search_term_exists = False return else: @@ -72,9 +72,8 @@ class SearchResultPage: assert(is_str(kw.get('dataset'))) self.dataset = create_dataset(kw['dataset'], dataset_type) - logger.debug("search_terms:", self.search_terms) - # ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here + # I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here try: self.search() except: @@ -97,76 +96,98 @@ class SearchResultPage: trait_list = [] json_trait_list = [] - species = webqtlDatabaseFunction.retrieve_species( - self.dataset.group.name) # result_set represents the results for each search term; a search of # "shh grin2b" would have two sets of results, one for each term + if self.dataset.type == "ProbeSet": + self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] + elif self.dataset.type == "Publish": + self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] + elif self.dataset.type == "Geno": + self.header_data_names = ['index', 'display_name', 'location'] + for index, result in enumerate(self.results): if not result: continue - #### Excel file needs to be generated #### - trait_dict = {} - trait_id = result[0] - this_trait = create_trait( - dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) - if this_trait: - trait_dict['index'] = index + 1 - trait_dict['name'] = this_trait.name - if this_trait.dataset.type == "Publish": - trait_dict['display_name'] = this_trait.display_name + trait_dict['index'] = index + 1 + trait_dict['name'] = result[0] + + #ZS: Check permissions on a trait-by-trait basis for phenotype traits + if self.dataset.type == "Publish": + permissions = check_resource_availability(self.dataset, trait_dict['name']) + if "view" not in permissions['data']: + continue + + trait_dict['display_name'] = result[0] + if self.dataset.type == "Publish": + if self.dataset.group.code: + trait_dict['display_name'] = self.dataset.group.code + "_" + str(result[0]) + + trait_dict['dataset'] = self.dataset.name + trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait_dict['name'], trait_dict['dataset'])) + if self.dataset.type == "ProbeSet": + trait_dict['symbol'] = "N/A" if result[3] is None else result[3].strip() + description_text = "N/A" if result[4] is None or str(result[4]) == "" else trait_dict['symbol'] + + target_string = result[5] + description_display = description_text if target_string is None or str(target_string) == "" else description_text + "; " + str(target_string).strip() + trait_dict['description'] = description_display + + trait_dict['location'] = "N/A" + if (result[6] is not None) and (result[6] != "") and (result[7] is not None) and (result[7] != 0): + trait_dict['location'] = f"Chr{result[6]}: {float(result[7]):.6f}" + + trait_dict['mean'] = "N/A" if result[8] is None or result[8] == "" else f"{result[8]:.3f}" + trait_dict['additive'] = "N/A" if result[12] is None or result[12] == "" else f"{result[12]:.3f}" + trait_dict['lod_score'] = "N/A" if result[9] is None or result[9] == "" else f"{float(result[9]) / 4.61:.1f}" + trait_dict['lrs_location'] = "N/A" if result[13] is None or result[13] == "" or result[14] is None else f"Chr{result[13]}: {float(result[14]):.6f}" + elif self.dataset.type == "Geno": + trait_dict['location'] = "N/A" + if (result[4] != "NULL" and result[4] != "") and (result[5] != 0): + trait_dict['location'] = f"Chr{result[4]}: {float(result[5]):.6f}" + elif self.dataset.type == "Publish": + trait_dict['description'] = "N/A" + trait_dict['pubmed_id'] = "N/A" + trait_dict['pubmed_link'] = "N/A" + trait_dict['pubmed_text'] = "N/A" + trait_dict['mean'] = "N/A" + trait_dict['additive'] = "N/A" + pre_pub_description = "N/A" if result[1] is None else result[1].strip() + post_pub_description = "N/A" if result[2] is None else result[2].strip() + if result[5] != "NULL" and result[5] != None: + trait_dict['pubmed_id'] = result[5] + trait_dict['pubmed_link'] = PUBMEDLINK_URL % trait_dict['pubmed_id'] + trait_dict['description'] = post_pub_description else: - trait_dict['display_name'] = this_trait.name - trait_dict['dataset'] = this_trait.dataset.name - trait_dict['hmac'] = hmac.data_hmac( - '{}:{}'.format(this_trait.name, this_trait.dataset.name)) - if this_trait.dataset.type == "ProbeSet": - trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A" - trait_dict['description'] = "N/A" - if this_trait.description_display: - trait_dict['description'] = this_trait.description_display - trait_dict['location'] = this_trait.location_repr - trait_dict['mean'] = "N/A" - trait_dict['additive'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = f"{this_trait.mean:.3f}" - try: - trait_dict['lod_score'] = f"{float(this_trait.LRS_score_repr) / 4.61:.1f}" - except: - trait_dict['lod_score'] = "N/A" - trait_dict['lrs_location'] = this_trait.LRS_location_repr - if this_trait.additive != "": - trait_dict['additive'] = f"{this_trait.additive:.3f}" - elif this_trait.dataset.type == "Geno": - trait_dict['location'] = this_trait.location_repr - elif this_trait.dataset.type == "Publish": - trait_dict['description'] = "N/A" - if this_trait.description_display: - trait_dict['description'] = this_trait.description_display - trait_dict['authors'] = this_trait.authors - trait_dict['pubmed_id'] = "N/A" - if this_trait.pubmed_id: - trait_dict['pubmed_id'] = this_trait.pubmed_id - trait_dict['pubmed_link'] = this_trait.pubmed_link - trait_dict['pubmed_text'] = this_trait.pubmed_text - trait_dict['mean'] = "N/A" - if this_trait.mean != "" and this_trait.mean != None: - trait_dict['mean'] = f"{this_trait.mean:.3f}" - try: - trait_dict['lod_score'] = f"{float(this_trait.LRS_score_repr) / 4.61:.1f}" - except: - trait_dict['lod_score'] = "N/A" - trait_dict['lrs_location'] = this_trait.LRS_location_repr - trait_dict['additive'] = "N/A" - if this_trait.additive != "": - trait_dict['additive'] = f"{this_trait.additive:.3f}" - # Convert any bytes in dict to a normal utf-8 string - for key in trait_dict.keys(): - if isinstance(trait_dict[key], bytes): - trait_dict[key] = trait_dict[key].decode('utf-8') - trait_list.append(trait_dict) + trait_dict['description'] = pre_pub_description + + if result[4].isdigit(): + trait_dict['pubmed_text'] = result[4] + + trait_dict['authors'] = result[3] + + if result[6] != "" and result[6] != None: + trait_dict['mean'] = f"{result[6]:.3f}" + + try: + trait_dict['lod_score'] = f"{float(result[7]) / 4.61:.1f}" + except: + trait_dict['lod_score'] = "N/A" + + try: + trait_dict['lrs_location'] = f"Chr{result[11]}: {float(result[12]):.6f}" + except: + trait_dict['lrs_location'] = "N/A" + + trait_dict['additive'] = "N/A" if not result[8] else f"{result[8]:.3f}" + + # Convert any bytes in dict to a normal utf-8 string + for key in trait_dict.keys(): + if isinstance(trait_dict[key], bytes): + trait_dict[key] = trait_dict[key].decode('utf-8') + trait_list.append(trait_dict) if self.results: self.max_widths = {} @@ -188,29 +209,18 @@ class SearchResultPage: self.trait_list = trait_list - if self.dataset.type == "ProbeSet": - self.header_data_names = ['index', 'display_name', 'symbol', 'description', - 'location', 'mean', 'lrs_score', 'lrs_location', 'additive'] - elif self.dataset.type == "Publish": - self.header_data_names = ['index', 'display_name', 'description', 'mean', - 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive'] - elif self.dataset.type == "Geno": - self.header_data_names = ['index', 'display_name', 'location'] - def search(self): """ This function sets up the actual search query in the form of a SQL statement and executes """ self.search_terms = parser.parse(self.search_terms) - logger.debug("After parsing:", self.search_terms) combined_from_clause = "" combined_where_clause = "" # The same table can't be referenced twice in the from clause previous_from_clauses = [] - logger.debug("len(search_terms)>1") symbol_list = [] if self.dataset.type == "ProbeSet": for a_search in self.search_terms: |