aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/do_search.py64
-rw-r--r--wqflask/wqflask/search_results.py166
2 files changed, 134 insertions, 96 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index 761ae326..6b8dfa41 100644
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -81,16 +81,31 @@ class MrnaAssaySearch(DoSearch):
DoSearch.search_types['ProbeSet'] = "MrnaAssaySearch"
- base_query = """SELECT distinct ProbeSet.Name as TNAME,
- 0 as thistable,
- ProbeSetXRef.Mean as TMEAN,
- ProbeSetXRef.LRS as TLRS,
- ProbeSetXRef.PVALUE as TPVALUE,
- ProbeSet.Chr_num as TCHR_NUM,
- ProbeSet.Mb as TMB,
- ProbeSet.Symbol as TSYMBOL,
- ProbeSet.name_num as TNAME_NUM
- FROM ProbeSetXRef, ProbeSet """
+ base_query = """
+ SELECT
+ ProbeSetFreeze.`Name`,
+ ProbeSetFreeze.`FullName`,
+ ProbeSet.`Name`,
+ ProbeSet.`Symbol`,
+ CAST(ProbeSet.`description` AS BINARY),
+ CAST(ProbeSet.`Probe_Target_Description` AS BINARY),
+ ProbeSet.`Chr`,
+ ProbeSet.`Mb`,
+ ProbeSetXRef.`Mean`,
+ ProbeSetXRef.`LRS`,
+ ProbeSetXRef.`Locus`,
+ ProbeSetXRef.`pValue`,
+ ProbeSetXRef.`additive`,
+ Geno.`Chr` as geno_chr,
+ Geno.`Mb` as geno_mb
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id`
+ INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id`
+ INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id`
+ INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id`
+ INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId`
+ LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id` """
header_fields = ['Index',
'Record',
@@ -193,10 +208,25 @@ class PhenotypeSearch(DoSearch):
DoSearch.search_types['Publish'] = "PhenotypeSearch"
base_query = """SELECT PublishXRef.Id,
- PublishFreeze.createtime as thistable,
- Publication.PubMed_ID as Publication_PubMed_ID,
- Phenotype.Post_publication_description as Phenotype_Name
- FROM Phenotype, PublishFreeze, Publication, PublishXRef """
+ CAST(Phenotype.`Pre_publication_description` AS BINARY),
+ CAST(Phenotype.`Post_publication_description` AS BINARY),
+ Publication.`Authors`,
+ Publication.`Year`,
+ Publication.`PubMed_ID`,
+ PublishXRef.`mean`,
+ PublishXRef.`LRS`,
+ PublishXRef.`additive`,
+ PublishXRef.`Locus`,
+ InbredSet.`InbredSetCode`,
+ Geno.`Chr`,
+ Geno.`Mb`
+ FROM Species
+ INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id`
+ INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id`
+ INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId`
+ INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId`
+ LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id """
search_fields = ('Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
@@ -382,12 +412,10 @@ class RifSearch(MrnaAssaySearch):
DoSearch.search_types['ProbeSet_RIF'] = "RifSearch"
def get_from_clause(self):
- return ", GeneRIF_BASIC "
+ return f" INNER JOIN GeneRIF_BASIC ON GeneRIF_BASIC.`symbol` = { self.dataset.type }.`symbol` "
def get_where_clause(self):
- where_clause = """( %s.symbol = GeneRIF_BASIC.symbol and
- MATCH (GeneRIF_BASIC.comment)
- AGAINST ('+%s' IN BOOLEAN MODE)) """ % (self.dataset.type, self.search_term[0])
+ where_clause = f"(MATCH (GeneRIF_BASIC.comment) AGAINST ('+{ self.search_term[0] }' IN BOOLEAN MODE)) "
return where_clause
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 78d30ba1..bc0c08a1 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -4,6 +4,7 @@ from math import *
import time
import re
import requests
+from types import SimpleNamespace
from pprint import pformat as pf
@@ -11,6 +12,7 @@ import json
from base.data_set import create_dataset
from base.trait import create_trait
+from base.webqtlConfig import PUBMEDLINK_URL
from wqflask import parser
from wqflask import do_search
from db import webqtlDatabaseFunction
@@ -18,13 +20,13 @@ from db import webqtlDatabaseFunction
from flask import Flask, g
from utility import hmac, helper_functions
+from utility.authentication_tools import check_resource_availability
from utility.tools import GN2_BASE_URL
from utility.type_checking import is_str
from utility.logger import getLogger
logger = getLogger(__name__)
-
class SearchResultPage:
#maxReturn = 3000
@@ -40,9 +42,7 @@ class SearchResultPage:
self.uc_id = uuid.uuid4()
self.go_term = None
- logger.debug("uc_id:", self.uc_id) # contains a unique id
- logger.debug("kw is:", kw) # dict containing search terms
if kw['search_terms_or']:
self.and_or = "or"
self.search_terms = kw['search_terms_or']
@@ -55,7 +55,7 @@ class SearchResultPage:
rx = re.compile(
r'.*\W(href|http|sql|select|update)\W.*', re.IGNORECASE)
if rx.match(search):
- logger.info("Regex failed search")
+ logger.debug("Regex failed search")
self.search_term_exists = False
return
else:
@@ -72,9 +72,8 @@ class SearchResultPage:
assert(is_str(kw.get('dataset')))
self.dataset = create_dataset(kw['dataset'], dataset_type)
- logger.debug("search_terms:", self.search_terms)
- # ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here
+ # I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here
try:
self.search()
except:
@@ -97,76 +96,98 @@ class SearchResultPage:
trait_list = []
json_trait_list = []
- species = webqtlDatabaseFunction.retrieve_species(
- self.dataset.group.name)
# result_set represents the results for each search term; a search of
# "shh grin2b" would have two sets of results, one for each term
+ if self.dataset.type == "ProbeSet":
+ self.header_data_names = ['index', 'display_name', 'symbol', 'description', 'location', 'mean', 'lrs_score', 'lrs_location', 'additive']
+ elif self.dataset.type == "Publish":
+ self.header_data_names = ['index', 'display_name', 'description', 'mean', 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive']
+ elif self.dataset.type == "Geno":
+ self.header_data_names = ['index', 'display_name', 'location']
+
for index, result in enumerate(self.results):
if not result:
continue
- #### Excel file needs to be generated ####
-
trait_dict = {}
- trait_id = result[0]
- this_trait = create_trait(
- dataset=self.dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
- if this_trait:
- trait_dict['index'] = index + 1
- trait_dict['name'] = this_trait.name
- if this_trait.dataset.type == "Publish":
- trait_dict['display_name'] = this_trait.display_name
+ trait_dict['index'] = index + 1
+ trait_dict['name'] = result[0]
+
+ #ZS: Check permissions on a trait-by-trait basis for phenotype traits
+ if self.dataset.type == "Publish":
+ permissions = check_resource_availability(self.dataset, trait_dict['name'])
+ if "view" not in permissions['data']:
+ continue
+
+ trait_dict['display_name'] = result[0]
+ if self.dataset.type == "Publish":
+ if self.dataset.group.code:
+ trait_dict['display_name'] = self.dataset.group.code + "_" + str(result[0])
+
+ trait_dict['dataset'] = self.dataset.name
+ trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait_dict['name'], trait_dict['dataset']))
+ if self.dataset.type == "ProbeSet":
+ trait_dict['symbol'] = "N/A" if result[3] is None else result[3].strip()
+ description_text = "N/A" if result[4] is None or str(result[4]) == "" else trait_dict['symbol']
+
+ target_string = result[5]
+ description_display = description_text if target_string is None or str(target_string) == "" else description_text + "; " + str(target_string).strip()
+ trait_dict['description'] = description_display
+
+ trait_dict['location'] = "N/A"
+ if (result[6] is not None) and (result[6] != "") and (result[7] is not None) and (result[7] != 0):
+ trait_dict['location'] = f"Chr{result[6]}: {float(result[7]):.6f}"
+
+ trait_dict['mean'] = "N/A" if result[8] is None or result[8] == "" else f"{result[8]:.3f}"
+ trait_dict['additive'] = "N/A" if result[12] is None or result[12] == "" else f"{result[12]:.3f}"
+ trait_dict['lod_score'] = "N/A" if result[9] is None or result[9] == "" else f"{float(result[9]) / 4.61:.1f}"
+ trait_dict['lrs_location'] = "N/A" if result[13] is None or result[13] == "" or result[14] is None else f"Chr{result[13]}: {float(result[14]):.6f}"
+ elif self.dataset.type == "Geno":
+ trait_dict['location'] = "N/A"
+ if (result[4] != "NULL" and result[4] != "") and (result[5] != 0):
+ trait_dict['location'] = f"Chr{result[4]}: {float(result[5]):.6f}"
+ elif self.dataset.type == "Publish":
+ trait_dict['description'] = "N/A"
+ trait_dict['pubmed_id'] = "N/A"
+ trait_dict['pubmed_link'] = "N/A"
+ trait_dict['pubmed_text'] = "N/A"
+ trait_dict['mean'] = "N/A"
+ trait_dict['additive'] = "N/A"
+ pre_pub_description = "N/A" if result[1] is None else result[1].strip()
+ post_pub_description = "N/A" if result[2] is None else result[2].strip()
+ if result[5] != "NULL" and result[5] != None:
+ trait_dict['pubmed_id'] = result[5]
+ trait_dict['pubmed_link'] = PUBMEDLINK_URL % trait_dict['pubmed_id']
+ trait_dict['description'] = post_pub_description
else:
- trait_dict['display_name'] = this_trait.name
- trait_dict['dataset'] = this_trait.dataset.name
- trait_dict['hmac'] = hmac.data_hmac(
- '{}:{}'.format(this_trait.name, this_trait.dataset.name))
- if this_trait.dataset.type == "ProbeSet":
- trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A"
- trait_dict['description'] = "N/A"
- if this_trait.description_display:
- trait_dict['description'] = this_trait.description_display
- trait_dict['location'] = this_trait.location_repr
- trait_dict['mean'] = "N/A"
- trait_dict['additive'] = "N/A"
- if this_trait.mean != "" and this_trait.mean != None:
- trait_dict['mean'] = f"{this_trait.mean:.3f}"
- try:
- trait_dict['lod_score'] = f"{float(this_trait.LRS_score_repr) / 4.61:.1f}"
- except:
- trait_dict['lod_score'] = "N/A"
- trait_dict['lrs_location'] = this_trait.LRS_location_repr
- if this_trait.additive != "":
- trait_dict['additive'] = f"{this_trait.additive:.3f}"
- elif this_trait.dataset.type == "Geno":
- trait_dict['location'] = this_trait.location_repr
- elif this_trait.dataset.type == "Publish":
- trait_dict['description'] = "N/A"
- if this_trait.description_display:
- trait_dict['description'] = this_trait.description_display
- trait_dict['authors'] = this_trait.authors
- trait_dict['pubmed_id'] = "N/A"
- if this_trait.pubmed_id:
- trait_dict['pubmed_id'] = this_trait.pubmed_id
- trait_dict['pubmed_link'] = this_trait.pubmed_link
- trait_dict['pubmed_text'] = this_trait.pubmed_text
- trait_dict['mean'] = "N/A"
- if this_trait.mean != "" and this_trait.mean != None:
- trait_dict['mean'] = f"{this_trait.mean:.3f}"
- try:
- trait_dict['lod_score'] = f"{float(this_trait.LRS_score_repr) / 4.61:.1f}"
- except:
- trait_dict['lod_score'] = "N/A"
- trait_dict['lrs_location'] = this_trait.LRS_location_repr
- trait_dict['additive'] = "N/A"
- if this_trait.additive != "":
- trait_dict['additive'] = f"{this_trait.additive:.3f}"
- # Convert any bytes in dict to a normal utf-8 string
- for key in trait_dict.keys():
- if isinstance(trait_dict[key], bytes):
- trait_dict[key] = trait_dict[key].decode('utf-8')
- trait_list.append(trait_dict)
+ trait_dict['description'] = pre_pub_description
+
+ if result[4].isdigit():
+ trait_dict['pubmed_text'] = result[4]
+
+ trait_dict['authors'] = result[3]
+
+ if result[6] != "" and result[6] != None:
+ trait_dict['mean'] = f"{result[6]:.3f}"
+
+ try:
+ trait_dict['lod_score'] = f"{float(result[7]) / 4.61:.1f}"
+ except:
+ trait_dict['lod_score'] = "N/A"
+
+ try:
+ trait_dict['lrs_location'] = f"Chr{result[11]}: {float(result[12]):.6f}"
+ except:
+ trait_dict['lrs_location'] = "N/A"
+
+ trait_dict['additive'] = "N/A" if not result[8] else f"{result[8]:.3f}"
+
+ # Convert any bytes in dict to a normal utf-8 string
+ for key in trait_dict.keys():
+ if isinstance(trait_dict[key], bytes):
+ trait_dict[key] = trait_dict[key].decode('utf-8')
+ trait_list.append(trait_dict)
if self.results:
self.max_widths = {}
@@ -188,29 +209,18 @@ class SearchResultPage:
self.trait_list = trait_list
- if self.dataset.type == "ProbeSet":
- self.header_data_names = ['index', 'display_name', 'symbol', 'description',
- 'location', 'mean', 'lrs_score', 'lrs_location', 'additive']
- elif self.dataset.type == "Publish":
- self.header_data_names = ['index', 'display_name', 'description', 'mean',
- 'authors', 'pubmed_text', 'lrs_score', 'lrs_location', 'additive']
- elif self.dataset.type == "Geno":
- self.header_data_names = ['index', 'display_name', 'location']
-
def search(self):
"""
This function sets up the actual search query in the form of a SQL statement and executes
"""
self.search_terms = parser.parse(self.search_terms)
- logger.debug("After parsing:", self.search_terms)
combined_from_clause = ""
combined_where_clause = ""
# The same table can't be referenced twice in the from clause
previous_from_clauses = []
- logger.debug("len(search_terms)>1")
symbol_list = []
if self.dataset.type == "ProbeSet":
for a_search in self.search_terms: