From ea46f42ee640928b92947bfb204c41a482d80937 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 8 May 2012 18:39:56 -0500 Subject: Add all the source codes into the github. --- web/webqtl/search/IndexPage.py | 41 ++ web/webqtl/search/SearchResultPage.py | 1237 +++++++++++++++++++++++++++++++++ web/webqtl/search/TextSearchPage.py | 536 ++++++++++++++ web/webqtl/search/__init__.py | 0 web/webqtl/search/pubmedsearch.py | 12 + 5 files changed, 1826 insertions(+) create mode 100755 web/webqtl/search/IndexPage.py create mode 100644 web/webqtl/search/SearchResultPage.py create mode 100755 web/webqtl/search/TextSearchPage.py create mode 100755 web/webqtl/search/__init__.py create mode 100755 web/webqtl/search/pubmedsearch.py (limited to 'web/webqtl/search') diff --git a/web/webqtl/search/IndexPage.py b/web/webqtl/search/IndexPage.py new file mode 100755 index 00000000..ddea19f4 --- /dev/null +++ b/web/webqtl/search/IndexPage.py @@ -0,0 +1,41 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +from base.templatePage import templatePage +from base import indexBody + +######################################### +# IndexPage +######################################### + +class IndexPage(templatePage): + + def __init__(self, fd): + templatePage.__init__(self, fd) + self.dict['title'] = 'GeneNetwork' + self.dict['body'] = indexBody.index_body_string + self.dict['js1'] = '' + self.dict['js2'] = 'onload="javascript:initialDatasetSelection();"' diff --git a/web/webqtl/search/SearchResultPage.py b/web/webqtl/search/SearchResultPage.py new file mode 100644 index 00000000..14d10731 --- /dev/null +++ b/web/webqtl/search/SearchResultPage.py @@ -0,0 +1,1237 @@ +import string +import os +import cPickle +import re +from math import * +import time +import pyXLWriter as xl +import pp +import math +import datetime + +from htmlgen import HTMLgen2 as HT + +from base import webqtlConfig +from utility.THCell import THCell +from utility.TDCell import TDCell +from base.webqtlDataset import webqtlDataset +from base.webqtlTrait import webqtlTrait +from base.templatePage import templatePage +from utility import webqtlUtil +from dbFunction import webqtlDatabaseFunction + +import logging +logging.basicConfig(filename="/tmp/gn_log", level=logging.INFO) +_log = logging.getLogger("search") + +class SearchResultPage(templatePage): + + maxReturn = 3000 +# NPerPage = 100 + nkeywords = 0 + + def __init__(self, fd): + + templatePage.__init__(self, fd) + + if not self.openMysql(): + return + + self.dict['title'] = 'Search Results' + TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee',valign="top") + self.database = fd.formdata.getfirst('database', '') + if not self.database or self.database == 'spacer': + #Error, No database selected + heading = "Search Result" + detail = ['''No database was selected for this search, please + go back and SELECT at least one database.'''] + self.error(heading=heading,detail=detail,error="No Database Selected") + return + elif type(self.database) == type(""): + #convert database into a database list + #was used for multiple databases search, this + #feature has been abandoned, + self.database = string.split(self.database,',') + else: + pass + + ########################################### + # Names and IDs of RISet / F2 set + ########################################### + if self.database == ['_allPublish']: + self.cursor.execute("""select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, + InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = + PublishFreeze.InbredSetId""") + results = self.cursor.fetchall() + self.database = map(lambda x: webqtlDataset(x[0], self.cursor), results) + self.databaseCrosses = map(lambda x: x[1], results) + self.databaseCrossIds = map(lambda x: x[2], results) + self.singleCross = False + else: + self.database = map(lambda x: webqtlDataset(x, self.cursor), self.database) + #currently, webqtl wouldn't allow multiple crosses + #for other than multiple publish db search + #so we can use the first database as example + if self.database[0].type=="Publish": + pass + elif self.database[0].type in ("Geno", "ProbeSet"): + + #userExist = None + + for individualDB in self.database: + self.cursor.execute('SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %sFreeze WHERE Name = "%s"' % (self.database[0].type, individualDB)) + indId, indName, indFullName, confidential, AuthorisedUsers = self.cursor.fetchall()[0] + + if confidential == 1: + access_to_confidential_dataset = 0 + + #for the dataset that confidentiality is 1 + #1. 'admin' and 'root' can see all of the dataset + #2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) + if webqtlConfig.USERDICT[self.privilege] > webqtlConfig.USERDICT['user']: + access_to_confidential_dataset = 1 + else: + AuthorisedUsersList=AuthorisedUsers.split(',') + if AuthorisedUsersList.__contains__(self.userName): + access_to_confidential_dataset = 1 + + if not access_to_confidential_dataset: + #Error, No database selected + heading = "Search Result" + detail = ["The %s database you selected is not open to the public at this time, please go back and SELECT other database." % indFullName] + self.error(heading=heading,detail=detail,error="Confidential Database") + return + else: + heading = "Search Result" + detail = ['''The database has not been established yet, please + go back and SELECT at least one database.'''] + self.error(heading=heading,detail=detail,error="No Database Selected") + return + + self.database[0].getRISet() + self.databaseCrosses = [self.database[0].riset] + self.databaseCrossIds = [self.database[0].risetid] + self.singleCross = True + #XZ, August 24,2010: Since self.singleCross = True, it's safe to assign one species Id. + self.speciesId = webqtlDatabaseFunction.retrieveSpeciesId(self.cursor, self.database[0].riset) + + ########################################### + # make sure search from same type of databases + ########################################### + dbTypes = map(lambda X: X.type, self.database) + self.dbType = dbTypes[0] + for item in dbTypes: + if item != self.dbType: + heading = "Search Result" + detail = ["Search can only be performed among the same type of databases"] + self.error(heading=heading,detail=detail,error="Error") + return + if self.dbType == "Publish": + self.searchField = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', 'Phenotype.Lab_code', 'Publication.PubMed_ID', 'Publication.Abstract', 'Publication.Title', 'Publication.Authors', 'PublishXRef.Id'] + + elif self.dbType == "ProbeSet": + self.searchField = ['Name','Description','Probe_Target_Description','Symbol','Alias','GenbankId', 'UniGeneId','RefSeq_TranscriptId'] + elif self.dbType == "Geno": + self.searchField = ['Name','Chr'] + + ########################################### + # Search Options + ########################################### + self.matchwhole = fd.formdata.getfirst('matchwhole') + #split result into pages + self.pageNumber = fd.formdata.getfirst('pageno', '0') + try: + self.pageNumber = int(self.pageNumber) + except: + self.pageNumber = 0 + + + ########################################### + # Generate Mysql Query + ########################################### + geneIdListQuery = fd.formdata.getfirst('geneId', '') + if geneIdListQuery: + geneIdListQuery = string.replace(geneIdListQuery, ",", " ") + geneIdListQuery = " geneId=%s" % string.join(string.split(geneIdListQuery), "-") + + self.ANDkeyword = fd.formdata.getfirst('ANDkeyword', "") + self.ORkeyword = fd.formdata.getfirst('ORkeyword', "") + + self.ORkeyword += geneIdListQuery + + self.ANDkeyword = self.ANDkeyword.replace("\\", "").strip() + self.ORkeyword = self.ORkeyword.replace("\\", "").strip() + #user defined sort option + self.orderByUserInput = fd.formdata.getfirst('orderByUserInput', "").strip() + #default sort option if user have not defined + self.orderByDefalut = "" + + #XZ, Dec/16/2010: I add examples to help understand this block of code. See details in function pattersearch. + + #XZ: self._1mPattern examples: WIKI=xxx, RIF=xxx, GO:0045202 + self._1mPattern = re.compile('\s*(\S+)\s*[:=]\s*([a-zA-Z-\+\d\.]+)\s*') + + #XZ: self._2mPattern examples: Mean=(15.0 16.0), Range=(10 100), LRS=(Low_LRS_limit, High_LRS_limit), pvalue=(Low_limit, High_limit), Range=(10 100) + self._2mPattern = re.compile('\s*(\S+)\s*[=in]{1,2}\s*\(\s*([-\d\.]+)[, \t]+([-\d\.]+)[, \t]*([-\d\.]*)\s*\)') + + #XZ: self._3mPattern examples: Position=(Chr1 98 104), Pos=(Chr1 98 104), Mb=(Chr1 98 104), CisLRS=(Low_LRS_limit, High_LRS_limit, Mb_buffer), TransLRS=(Low_LRS_limit, High_LRS_limit, Mb_buffer) + self._3mPattern = re.compile('\s*(\S+)\s*[=in]{1,2}\s*\(\s*[Cc][Hh][Rr]([^, \t]+)[, \t]+([-\d\.]+)[, \t]+([-\d\.]+)\s*\)') + + #XZ: self._5mPattern examples: LRS=(Low_LRS_limit, High_LRS_limit, ChrNN, Mb_Low_Limit, Mb_High_Limit) + self._5mPattern = re.compile('\s*(\S+)\s*[=in]{1,2}\s*\(\s*([-\d\.]+)[, \t]+([-\d\.]+)[, \t]+[Cc][Hh][Rr]([^, \t]+)[, \t]+([-\d\.]+)[, \t]+([-\d\.]+)\s*\)') + + #Error, No keyword input + if not (self.ORkeyword or self.ANDkeyword): + heading = "Search Result" + detail = ["Please make sure to enter either your search terms (genes, traits, markers), or advanced search commands."] + self.error(heading=heading,detail=detail,error="No search terms were entered") + return + + #query clauses + self.ANDQuery = [] + self.ORQuery = [] + #descriptions, one for OR search, one for AND search + self.ANDDescriptionText = [] + self.ORDescriptionText = [] + + if not self.normalSearch(): + return + if not self.patternSearch(): + return + if not self.assembleQuery(): + return + self.nresults = self.executeQuery() + + if len(self.database) > 1: + dbUrl = "Multiple phenotype databases" + dbUrlLink = " were" + else: + dbUrl = self.database[0].genHTML() + dbUrlLink = " was" + + SearchText = HT.Blockquote('GeneNetwork searched the ', dbUrl, ' for all records ') + if self.ORkeyword2: + NNN = len(self.ORkeyword2) + if NNN > 1: + SearchText.append(' that match the terms ') + else: + SearchText.append(' that match the term ') + for j, term in enumerate(self.ORkeyword2): + SearchText.append(HT.U(term)) + if NNN > 1 and j < NNN-2: + SearchText.append(", ") + elif j == NNN-2: + SearchText.append(", or ") + else: + pass + if self.ORDescriptionText: + if self.ORkeyword2: + SearchText.append("; ") + else: + SearchText.append(" ") + for j, item in enumerate(self.ORDescriptionText): + SearchText.append(item) + if j < len(self.ORDescriptionText) -1: + SearchText.append(";") + + if (self.ORkeyword2 or self.ORDescriptionText) and (self.ANDkeyword2 or self.ANDDescriptionText): + SearchText.append("; ") + if self.ANDkeyword2: + if (self.ORkeyword2 or self.ORDescriptionText): + SearchText.append(' records') + NNN = len(self.ANDkeyword2) + if NNN > 1: + SearchText.append(' that match the terms ') + else: + SearchText.append(' that match the term ') + for j, term in enumerate(self.ANDkeyword2): + SearchText.append(HT.U(term)) + if NNN > 1 and j < NNN-2: + SearchText.append(", ") + elif j == NNN-2: + SearchText.append(", and ") + else: + pass + if self.ANDDescriptionText: + if self.ANDkeyword2: + SearchText.append(" and ") + else: + SearchText.append(" ") + for j, item in enumerate(self.ANDDescriptionText): + SearchText.append(item) + if j < len(self.ANDDescriptionText) -1: + SearchText.append(" and ") + + SearchText.append(". ") + if self.nresults == 0: + heading = "Search Result" + detail = ["Sorry, GeneNetwork did not find any records matching your request. Please check the syntax or try the ANY rather than the ALL field."] + self.error(heading=heading,intro = SearchText.contents,detail=detail,error="Not Found") + return + elif self.nresults == 1: + SearchText.append(HT.P(), 'GeneNetwork found one record that matches your request. To study this record, click on its text below. To add this record to your Selection window, use the checkbox and then click the ', HT.Strong('Add to Collection'),' button. ') + elif self.nresults >= 1 and self.nresults <= self.maxReturn: + SearchText.append(HT.P(), 'GeneNetwork found a total of ', HT.Span(self.nresults, Class='fwb cr'), ' records. To study any one of these records, click on its ID below. To add one or more records to your Selection window, use the checkbox and then click the ' , HT.Strong('Add to Collection'),' button. ') + else: + SearchText.append(' A total of ',HT.Span(self.nresults, Class='fwb cr'), ' records were found.') + heading = "Search Result" + # Modified by Hongqiang Li + # detail = ["The terms you entered match %d records. Please modify your search to generate %d or fewer matches, or review " % (self.nresults, self.maxReturn), HT.Href(text='Search Help', target='_blank', url='http://web2qtl.utmem.edu/searchHelp.html', Class='fs14'), " to learn more about syntax and the use of wildcard characters."] + detail = ["The terms you entered match %d records. Please modify your search to generate %d or fewer matches, or review " % (self.nresults, self.maxReturn), HT.Href(text='Search Help', target='_blank', url='%s/searchHelp.html' % webqtlConfig.PORTADDR, Class='fs14'), " to learn more about syntax and the use of wildcard characters."] + # + self.error(heading=heading,intro = SearchText.contents,detail=detail,error="Over %d" % self.maxReturn) + return + + + TD_LR.append(HT.Paragraph('Search Results', Class="title"), SearchText) + self.genSearchResultTable(TD_LR) + self.dict['body'] = str(TD_LR) + self.dict['js1'] = '' + self.dict['js2'] = 'onLoad="pageOffset()"' + self.dict['layer'] = self.generateWarningLayer() + + def genSearchResultTable(self, TD_LR): + + pageTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="100%",border=0) + + lastone = False + for i, item in enumerate(self.results): + if not item: + continue + lastone = False + + traitList = [] + for k, item2 in enumerate(item): + j, ProbeSetID = item2[:2] + thisTrait = webqtlTrait(db=self.database[j], name=ProbeSetID, cursor=self.cursor) + traitList.append(thisTrait) + + ############## + # Excel file # + ############## + filename= webqtlUtil.genRandStr("Search_") + xlsUrl = HT.Input(type='button', value = 'Download Table', onClick= "location.href='/tmp/%s.xls'" % filename, Class='button') + # Create a new Excel workbook + workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+filename)) + headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") + + #XZ, 3/18/2010: pay attention to the line number of header in this file. As of today, there are 7 lines. + worksheet = self.createExcelFileWithTitleAndFooter(workbook=workbook, db=thisTrait.db, returnNumber=len(traitList)) + newrow = 7 + + tbl = HT.TableLite(cellSpacing=2,cellPadding=0,width="90%",border=0) + #seq = self.pageNumber*self.NPerPage+1 //Edited out because we show all results in one page now - Zach 2/22/11 + seq = 1 + RISet = self.databaseCrosses[i] + thisFormName = 'showDatabase'+RISet + selectall = HT.Href(url="#", onClick="checkAll(document.getElementsByName('%s')[0]);" % thisFormName) + selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") + selectall.append(selectall_img) + reset = HT.Href(url="#", onClick="checkNone(document.getElementsByName('%s')[0]);" % thisFormName) + reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") + reset.append(reset_img) + selectinvert = HT.Href(url="#", onClick="checkInvert(document.getElementsByName('%s')[0]);" % thisFormName) + selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") + selectinvert.append(selectinvert_img) + addselect = HT.Href(url="#") + addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") + addselect.append(addselect_img) + + optionsTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="20%",border=0) + optionsRow = HT.TR(HT.TD(selectall, width="25%"), HT.TD(reset, width="25%"), HT.TD(selectinvert, width="25%"), HT.TD(addselect, width="25%")) + labelsRow = HT.TR(HT.TD(" "*2,"Select", width="25%"), HT.TD(" ","Deselect", width="255"), HT.TD(" "*3,"Invert", width="25%"), HT.TD(" "*4,"Add", width="25%")) + optionsTable.append(optionsRow, labelsRow) + + pageTable.append(HT.TR(HT.TD(optionsTable)), HT.TR(HT.TD(xlsUrl, height=40))) + + tblobj = {} + mainfmName = thisFormName + species = webqtlDatabaseFunction.retrieveSpecies(cursor=self.cursor, RISet=RISet) + + if thisTrait.db.type=="Geno": + tblobj['header'] = self.getTableHeaderForGeno(worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + + newrow += 1 + + sortby = self.getSortByValue(datasetType="Geno") + + tblobj['body'] = self.getTableBodyForGeno(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow) + + workbook.close() + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + div = HT.Div(webqtlUtil.genTableObj(tblobj, filename, sortby), Id="sortable") + + pageTable.append(HT.TR(HT.TD(div))) + + elif thisTrait.db.type=="Publish": + tblobj['header'] = self.getTableHeaderForPublish(worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + + newrow += 1 + + sortby = self.getSortByValue(datasetType="Publish") + + tblobj['body'] = self.getTableBodyForPublish(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, species=species) + + workbook.close() + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + div = HT.Div(webqtlUtil.genTableObj(tblobj, filename, sortby), Id="sortable") + + pageTable.append(HT.TR(HT.TD(div))) + + elif thisTrait.db.type=="ProbeSet": + tblobj['header'] = self.getTableHeaderForProbeSet(worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) + + newrow += 1 + + sortby = self.getSortByValue(datasetType="ProbeSet") + + tblobj['body'] = self.getTableBodyForProbeSet(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, species=species) + + workbook.close() + objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') + cPickle.dump(tblobj, objfile) + objfile.close() + + div = HT.Div(webqtlUtil.genTableObj(tblobj, filename, sortby), Id="sortable") + + pageTable.append(HT.TR(HT.TD(div))) + + + traitForm = HT.Form(cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name=thisFormName, submit=HT.Input(type='hidden')) + hddn = {'FormID':'showDatabase','ProbeSetID':'_','database':'_','CellID':'_','RISet':RISet} + hddn['incparentsf1']='ON' + for key in hddn.keys(): + traitForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) + + traitForm.append(HT.P(),pageTable) + + TD_LR.append(traitForm) + if len(self.results) > 1 and i < len(self.results) - 1: + lastone = True + if lastone: + TD_LR.contents.pop() + + def executeQuery(self): + + ##construct sorting + if self.dbType == "Publish": + sortQuery = " order by Publication_PubMed_ID desc, Phenotype_Name, thistable" + elif self.dbType == "Geno": + if not self.orderByUserInput: + if self.orderByDefalut: + self.orderByUserInput = self.orderByDefalut + else: + self.orderByUserInput = "POSITION" + if self.orderByUserInput.upper() in ["POS", "POSITION", "MB"]: + self.orderByUserInput = "POSITION" + else: + pass + self.orderByUserInput = self.orderByUserInput.upper() + self.orderByUserInputOrig = self.orderByUserInput[:] + if self.orderByUserInput == "NAME": + sortQuery = " order by Geno_Name, Geno_chr_num, Geno_Mb" + elif self.orderByUserInput == "SOURCE": + sortQuery = " order by Geno_Source2, Geno_chr_num, Geno_Mb" + else: + sortQuery = " order by Geno_chr_num, Geno_Mb" + #ProbeSet + else: + if not self.orderByUserInput: + if self.orderByDefalut: + self.orderByUserInput = self.orderByDefalut + else: + self.orderByUserInput = "POSITION" + + self.orderByUserInput = self.orderByUserInput.upper() + self.orderByUserInputOrig = self.orderByUserInput[:] + #XZ: 8/18/2009: "POSITION-" + if self.orderByUserInput[-1] == '-': + self.orderByUserInput = self.orderByUserInput[:-1] + sortDesc = 'desc' + else: + sortDesc = '' + + if self.orderByUserInput in ["MEAN", "LRS", "PVALUE"]: + #sortQuery = " order by T%s %s, TNAME, thistable desc" % (self.orderByUserInput, sortDesc) + sortQuery = " order by T%s desc, TNAME, thistable desc" % self.orderByUserInput + elif self.orderByUserInput in ["POS", "POSITION", "MB"]: + sortQuery = " order by TCHR_NUM %s, TMB %s, TNAME, thistable desc" % (sortDesc, sortDesc) + elif self.orderByUserInput == 'SYMBOL': + sortQuery = " order by TSYMBOL, thistable desc" + else: + sortQuery = " order by TNAME_NUM, thistable desc" + + if self.singleCross: + if len(self.query) > 1: + searchQuery = map(lambda X:'(%s)' % X, self.query) + searchQuery = string.join(searchQuery, ' UNION ALL ') + else: + searchQuery = self.query[0] + searchQuery += sortQuery + #searchCountQuery retrieve all the results + searchCountQuery = [searchQuery] + #searchQuery = searchQuery + " limit %d,%d" % (self.pageNumber*self.NPerPage, self.NPerPage) // We removed the page limit - Zach 2/22/11 + searchQuery = [searchQuery] + else: + searchCountQuery = searchQuery = map(lambda X: X+sortQuery, self.query) + + allResults = [] + self.results = [] + for item in searchCountQuery: + start_time = datetime.datetime.now() + _log.info("Executing query: %s"%(item)) + self.cursor.execute(item) + allResults.append(self.cursor.fetchall()) + end_time = datetime.datetime.now() + _log.info("Total time: %s"%(end_time-start_time)) + + _log.info("Done executing queries") + + + #searchCountQuery retrieve all the results, for counting use only + if searchCountQuery != searchQuery: + for item in searchQuery: + self.cursor.execute(item) + self.results.append(self.cursor.fetchall()) + else: + self.results = allResults + + nresults = reduce(lambda Y,X:len(X)+Y, allResults, 0) + return nresults + + + + def assembleQuery(self): + self.query = [] + if self.ANDQuery or self.ORQuery: + clause = self.ORQuery[:] + + for j, database in enumerate(self.database): + if self.ANDQuery: + clause.append(" (%s) " % string.join(self.ANDQuery, " AND ")) + + newclause = [] + + for item in clause: + ##need to retrieve additional field which won't be used + ##in the future, for sorting purpose only + if self.dbType == "Publish": + if item.find("Geno.name") < 0: + incGenoTbl = "" + else: + incGenoTbl = " Geno, " + newclause.append("SELECT %d, PublishXRef.Id, PublishFreeze.createtime as thistable, Publication.PubMed_ID as Publication_PubMed_ID, Phenotype.Post_publication_description as Phenotype_Name FROM %s PublishFreeze, Publication, PublishXRef, Phenotype WHERE PublishXRef.InbredSetId = %d and %s and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishFreeze.Id = %d" % (j, incGenoTbl, self.databaseCrossIds[j], item, database.id)) + elif self.dbType == "ProbeSet": + if item.find("GOgene") < 0: + incGoTbl = "" + else: + incGoTbl = " ,db_GeneOntology.term as GOterm, db_GeneOntology.association as GOassociation, db_GeneOntology.gene_product as GOgene_product " + if item.find("Geno.name") < 0: + incGenoTbl = "" + else: + incGenoTbl = " Geno, " + if item.find("GeneRIF_BASIC.") < 0: + incGeneRIFTbl = "" + else: + incGeneRIFTbl = " GeneRIF_BASIC, " + if item.find("GeneRIF.") < 0: + incGeneRIFTbl += "" + else: + incGeneRIFTbl += " GeneRIF, " + newclause.append("""SELECT distinct %d, ProbeSet.Name as TNAME, 0 as thistable, + ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, ProbeSetXRef.PVALUE as TPVALUE, + ProbeSet.Chr_num as TCHR_NUM, ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, + ProbeSet.name_num as TNAME_NUM FROM %s%s ProbeSetXRef, ProbeSet %s + WHERE %s and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = %d + """ % (j, incGeneRIFTbl, incGenoTbl, incGoTbl, item, database.id)) + elif self.dbType == "Geno": + newclause.append("SELECT %d, Geno.Name, GenoFreeze.createtime as thistable, Geno.Name as Geno_Name, Geno.Source2 as Geno_Source2, Geno.chr_num as Geno_chr_num, Geno.Mb as Geno_Mb FROM GenoXRef, GenoFreeze, Geno WHERE %s and Geno.Id = GenoXRef.GenoId and GenoXRef.GenoFreezeId = GenoFreeze.Id and GenoFreeze.Id = %d"% (j, item, database.id)) + else: + pass + + searchQuery = map(lambda X:'(%s)' % X, newclause) + searchQuery = string.join(searchQuery, ' UNION ') + self.query.append(searchQuery) + return 1 + else: + heading = "Search Result" + detail = ["No keyword was entered for this search, please go back and enter your keyword."] + self.error(heading=heading,detail=detail,error="No Keyword") + return 0 + + + + def normalSearch(self): + self.ANDkeyword2 = re.sub(self._1mPattern, '', self.ANDkeyword) + self.ANDkeyword2 = re.sub(self._2mPattern, '', self.ANDkeyword2) + self.ANDkeyword2 = re.sub(self._3mPattern, '', self.ANDkeyword2) + self.ANDkeyword2 = re.sub(self._5mPattern, '', self.ANDkeyword2) + ##remove remain parethesis, could be input with syntax error + self.ANDkeyword2 = re.sub(re.compile('\s*\([\s\S]*\)'), '', self.ANDkeyword2) + self.ANDkeyword2 = self.encregexp(self.ANDkeyword2) + + self.ORkeyword2 = re.sub(self._1mPattern, '', self.ORkeyword) + self.ORkeyword2 = re.sub(self._2mPattern, '', self.ORkeyword2) + self.ORkeyword2 = re.sub(self._3mPattern, '', self.ORkeyword2) + self.ORkeyword2 = re.sub(self._5mPattern, '', self.ORkeyword2) + ##remove remain parethesis, could be input with syntax error + self.ORkeyword2 = re.sub(re.compile('\s*\([\s\S]*\)'), '', self.ORkeyword2) + self.ORkeyword2 = self.encregexp(self.ORkeyword2) + + if self.ORkeyword2 or self.ANDkeyword2: + ANDFulltext = [] + ORFulltext = [] + for k, item in enumerate(self.ORkeyword2 + self.ANDkeyword2): + self.nkeywords += 1 + if k >=len(self.ORkeyword2): + query = self.ANDQuery + DescriptionText = self.ANDDescriptionText + clausejoin = ' OR ' + fulltext = ANDFulltext + else: + query = self.ORQuery + DescriptionText = self.ORDescriptionText + clausejoin = ' OR ' + fulltext = ORFulltext + + if self.dbType == "ProbeSet" and item.find('.') < 0 and item.find('\'') < 0: + fulltext.append(item) + else: + if self.matchwhole and item.find("'") < 0: + item = "[[:<:]]"+ item+"[[:>:]]" + clause2 = [] + for field in self.searchField: + if self.dbType == "Publish": + clause2.append("%s REGEXP \"%s\"" % (field,item)) + else: + clause2.append("%s REGEXP \"%s\"" % ("%s.%s" % (self.dbType,field),item)) + clauseItem = "(%s)" % string.join(clause2, clausejoin) + query.append(" (%s) " % clauseItem) + if ANDFulltext: + clauseItem = " MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('+%s' IN BOOLEAN MODE) " % string.join(ANDFulltext, " +") + self.ANDQuery.append(" (%s) " % clauseItem) + if ORFulltext: + clauseItem = " MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) " % string.join(ORFulltext, " ") + self.ORQuery.append(" (%s) " % clauseItem) + else: + pass + return 1 + + + + def encregexp(self,str): + if not str: + return [] + else: + wildcardkeyword = str.strip() + wildcardkeyword = string.replace(wildcardkeyword,',',' ') + wildcardkeyword = string.replace(wildcardkeyword,';',' ') + wildcardkeyword = wildcardkeyword.split() + NNN = len(wildcardkeyword) + for i in range(NNN): + keyword = wildcardkeyword[i] + keyword = string.replace(keyword,"*",".*") + keyword = string.replace(keyword,"?",".") + wildcardkeyword[i] = keyword#'[[:<:]]'+ keyword+'[[:>:]]' + return wildcardkeyword + + + + def patternSearch(self): + # Lei Yan + ##Process Inputs + m1_AND = self._1mPattern.findall(self.ANDkeyword) + m2_AND = self._2mPattern.findall(self.ANDkeyword) + m3_AND = self._3mPattern.findall(self.ANDkeyword) + m5_AND = self._5mPattern.findall(self.ANDkeyword) + m1_OR = self._1mPattern.findall(self.ORkeyword) + m2_OR = self._2mPattern.findall(self.ORkeyword) + m3_OR = self._3mPattern.findall(self.ORkeyword) + m5_OR = self._5mPattern.findall(self.ORkeyword) + + #pattern search + if m1_AND or m1_OR or m2_AND or m2_OR or m3_AND or m3_OR or m5_AND or m5_OR: + + self.orderByDefalut = 'PROBESETID' + + _1Cmds = map(string.upper, map(lambda x:x[0], m1_AND + m1_OR)) + _2Cmds = map(string.upper, map(lambda x:x[0], m2_AND + m2_OR)) + _3Cmds = map(string.upper, map(lambda x:x[0], m3_AND + m3_OR)) + _5Cmds = map(string.upper, map(lambda x:x[0], m5_AND + m5_OR)) + + self.nkeywords += len(_1Cmds) + len(_2Cmds) + len(_3Cmds) + + if self.dbType == "Publish" and \ + ( (_2Cmds and reduce(lambda x, y: (y not in ["LRS"]) or x, _2Cmds, False))\ + or (_5Cmds and reduce(lambda x, y: (y not in ["LRS"]) or x, _5Cmds, False)) ): + heading = "Search Result" + detail = ["Pattern search is not available for phenotype databases at this time."] + self.error(heading=heading,detail=detail,error="Error") + return 0 + elif self.dbType == "ProbeSet" and \ + ((_2Cmds and reduce(lambda x, y: (y not in ["MEAN", "LRS", "PVALUE", "TRANSLRS", "CISLRS", "RANGE", "H2"]) or x, _2Cmds, False))\ + or (_3Cmds and reduce(lambda x, y: (y not in ["POS", "POSITION", "MB"]) or x, _3Cmds, False))\ + or (_5Cmds and reduce(lambda x, y: (y not in ["LRS"]) or x, _5Cmds, False))\ + or (_1Cmds and reduce(lambda x, y: (y not in ["FLAG", "STRAND_PROBE", "STRAND_GENE", "GO", "WIKI", "RIF", "GENEID"]) or x, _1Cmds, False))): + heading = "Search Result" + detail = ["You entered at least one incorrect search command."] + self.error(heading=heading,detail=detail,error="Error") + return 0 + elif self.dbType == "Geno" and (_1Cmds or _2Cmds or _5Cmds or (_3Cmds and reduce(lambda x, y: (y not in ["POS", "POSITION", "MB"]) or x, _3Cmds, False)) ): + heading = "Search Result" + detail = ["You entered at least one incorrect search command."] + self.error(heading=heading,detail=detail,error="Error") + return 0 + else: + for k, item in enumerate(m1_OR+m1_AND): + if k >=len(m1_OR): + query = self.ANDQuery + DescriptionText = self.ANDDescriptionText + else: + query = self.ORQuery + DescriptionText = self.ORDescriptionText + + if item[1] == '-': + strandName = 'minus' + elif item[1] == '+': + strandName = 'plus' + else: + strandName = item[1] + + if item[0].upper() in ("FLAG"): + clauseItem = " %s.%s = %s " % (self.dbType, item[0], item[1]) + DescriptionText.append(HT.Span(' with ', HT.U('FLAG'), ' equal to ', item[1])) + elif item[0].upper() in ("WIKI"): + clauseItem = " %s.symbol = GeneRIF.symbol and GeneRIF.versionId=0 and GeneRIF.display>0 and (GeneRIF.comment REGEXP \"%s\" or GeneRIF.initial = \"%s\") " % (self.dbType, "[[:<:]]"+ item[1]+"[[:>:]]", item[1]) + DescriptionText.append(HT.Span(' with GeneWiki contains ', HT.U(item[1]))) + elif item[0].upper() in ("RIF"): + clauseItem = " %s.symbol = GeneRIF_BASIC.symbol and MATCH (GeneRIF_BASIC.comment) AGAINST ('+%s' IN BOOLEAN MODE) " % (self.dbType, item[1]) + DescriptionText.append(HT.Span(' with GeneRIF contains ', HT.U(item[1]))) + elif item[0].upper() in ("GENEID"): + clauseItem = " %s.GeneId in ( %s ) " % (self.dbType, string.replace(item[1], '-', ', ')) + DescriptionText.append(HT.Span(' with Entrez Gene ID in ', HT.U(string.replace(item[1], '-', ', ')))) + elif item[0].upper() in ("GO"): + Field = 'GOterm.acc' + Id = 'GO:'+('0000000'+item[1])[-7:] + Statements = '%s.symbol=GOgene_product.symbol and GOassociation.gene_product_id=GOgene_product.id and GOterm.id=GOassociation.term_id' % (self.dbType); + clauseItem = " %s = '%s' and %s " % (Field, Id, Statements) + #self.incGoTbl = " ,db_GeneOntology.term as GOterm, db_GeneOntology.association as GOassociation, db_GeneOntology.gene_product as GOgene_product " + DescriptionText.append(HT.Span(' with ', HT.U('GO'), ' ID equal to ', Id)) + else: + clauseItem = " %s.%s = '%s' " % (self.dbType, item[0], item[1]) + if item[0].upper() in ["STRAND_PROBE"]: + DescriptionText.append(' with probe on the %s strand' % strandName) + elif item[0].upper() in ["STRAND_GENE"]: + DescriptionText.append(' with gene on the %s strand' % strandName) + else: + pass + query.append(" (%s) " % clauseItem) + + for k, item in enumerate(m2_OR+m2_AND): + if k >=len(m2_OR): + query = self.ANDQuery + DescriptionText = self.ANDDescriptionText + else: + query = self.ORQuery + DescriptionText = self.ORDescriptionText + + itemCmd = item[0] + lowerLimit = float(item[1]) + upperLimit = float(item[2]) + + if itemCmd.upper() in ("TRANSLRS", "CISLRS"): + if item[3]: + mthresh = float(item[3]) + clauseItem = " %sXRef.LRS > %2.7f and %sXRef.LRS < %2.7f " % \ + (self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit)) + if itemCmd.upper() == "CISLRS": + clauseItem += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and %s.Chr = Geno.Chr and ABS(%s.Mb-Geno.Mb) < %2.7f """ % (self.dbType, self.speciesId, self.dbType, self.dbType, mthresh) + DescriptionText.append(HT.Span(' with a ', HT.U('cis-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit), mthresh))) + else: + clauseItem += """ and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and (%s.Chr != Geno.Chr or (%s.Chr != Geno.Chr and ABS(%s.Mb-Geno.Mb) > %2.7f)) """ % (self.dbType, self.speciesId, self.dbType, self.dbType, self.dbType, mthresh) + DescriptionText.append(HT.Span(' with a ', HT.U('trans-QTL'), ' having an LRS between %g and %g using a %g Mb exclusion buffer' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit), mthresh))) + query.append(" (%s) " % clauseItem) + self.orderByDefalut = "LRS" + else: + pass + elif itemCmd.upper() in ("RANGE"): + #XZ, 03/05/2009: Xiaodong changed Data to ProbeSetData + clauseItem = " (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) > %2.7f and (select Pow(2, max(value) -min(value)) from ProbeSetData where Id = ProbeSetXRef.dataId) < %2.7f " % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)) + query.append(" (%s) " % clauseItem) + DescriptionText.append(HT.Span(' with a range of expression that varied between %g and %g' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)), " (fold difference)")) + else: + clauseItem = " %sXRef.%s > %2.7f and %sXRef.%s < %2.7f " % \ + (self.dbType, itemCmd, min(lowerLimit, upperLimit), self.dbType, itemCmd, max(lowerLimit, upperLimit)) + query.append(" (%s) " % clauseItem) + self.orderByDefalut = itemCmd + DescriptionText.append(HT.Span(' with ', HT.U(itemCmd), ' between %g and %g' % (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)))) + + for k, item in enumerate(m3_OR+m3_AND): + if k >=len(m3_OR): + query = self.ANDQuery + DescriptionText = self.ANDDescriptionText + else: + query = self.ORQuery + DescriptionText = self.ORDescriptionText + itemCmd = item[0] + chrsch = item[1] + lowerLimit = float(item[2]) + upperLimit = float(item[3]) + fname = 'target genes' + if self.dbType == "ProbeSet": + clauseItem = " %s.Chr = '%s' and %s.Mb > %2.7f and %s.Mb < %2.7f " % \ + (self.dbType, chrsch, self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit)) + elif self.dbType == "Geno": + fname = 'loci' + clauseItem = " %s.Chr = '%s' and %s.Mb > %2.7f and %s.Mb < %2.7f " % \ + (self.dbType, chrsch, self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit)) + else: + continue + query.append(" (%s) " % clauseItem) + self.orderByDefalut = itemCmd + DescriptionText.append(HT.Span(' with ', HT.U('target genes'), ' on chromosome %s between %g and %g Mb' % \ + (chrsch, min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)))) + + for k, item in enumerate(m5_OR+m5_AND): + if k >=len(m5_OR): + query = self.ANDQuery + DescriptionText = self.ANDDescriptionText + else: + query = self.ORQuery + DescriptionText = self.ORDescriptionText + itemCmd = item[0] + lowerLimit = float(item[1]) + upperLimit = float(item[2]) + chrsch = item[3] + MblowerLimit = float(item[4]) + MbupperLimit = float(item[5]) + if self.dbType == "ProbeSet" or self.dbType == "Publish": + clauseItem = " %sXRef.LRS > %2.7f and %sXRef.LRS < %2.7f " % \ + (self.dbType, min(lowerLimit, upperLimit), self.dbType, max(lowerLimit, upperLimit)) + clauseItem += " and %sXRef.Locus = Geno.name and Geno.SpeciesId = %s and Geno.Chr = '%s' and Geno.Mb > %2.7f and Geno.Mb < %2.7f" \ + % (self.dbType, self.speciesId, chrsch, min(MblowerLimit, MbupperLimit), max(MblowerLimit, MbupperLimit)) + query.append(" (%s) " % clauseItem) + self.orderByDefalut = "MB" + DescriptionText.append(HT.Span(' with ', HT.U('LRS'), ' between %g and %g' % \ + (min(lowerLimit, upperLimit), max(lowerLimit, upperLimit)), \ + ' on chromosome %s between %g and %g Mb' % \ + (chrsch, min(MblowerLimit, MbupperLimit), max(MblowerLimit, MbupperLimit)))) + pass + + return 1 + + def generateWarningLayer(self): + + layerString = """ + + + + + """ + + return layerString + + def getTableHeaderForGeno(self, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + className = "fs13 fwb ffl b1 cw cbrb" + + tblobj_header = [[THCell(HT.TD(' ', Class=className), sort=0), + THCell(HT.TD('Record', HT.BR(), 'ID', HT.BR(), Class=className), text='record_id', idx=1), + THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), Class=className), text='location', idx=2)]] + + for ncol, item in enumerate(['Record ID', 'Location (Chr, Mb)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + return tblobj_header + + + def getTableBodyForGeno(self, traitList, formName=None, worksheet=None, newrow=None): + + tblobj_body = [] + + className = "fs12 fwn ffl b1 c222" + + for thisTrait in traitList: + tr = [] + + if not thisTrait.haveinfo: + thisTrait.retrieveInfo() + + trId = str(thisTrait) + + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class=className), text=trId)) + + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showDatabase3('%s','%s','%s','')" % (formName, thisTrait.db.name, thisTrait.name), Class="fs12 fwn ffl"),align="left", Class=className), text=thisTrait.name, val=thisTrait.name.upper())) + + #XZ: trait_location_value is used for sorting + trait_location_repr = 'N/A' + trait_location_value = 1000000 + + if thisTrait.chr and thisTrait.mb: + try: + trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb + except: + if thisTrait.chr.upper() == 'X': + trait_location_value = 20*1000 + thisTrait.mb + else: + trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb + + trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) + + tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, trait_location_repr]): + worksheet.write([newrow, ncol], item) + + newrow += 1 + + return tblobj_body + + def getTableHeaderForPublish(self, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + className = "fs13 fwb ffl b1 cw cbrb" + + tblobj_header = [[THCell(HT.TD(' ', Class=className, nowrap="on"), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class=className, nowrap="on"), text="recond_id", idx=1), + THCell(HT.TD('Phenotype',HT.BR(),HT.BR(), Class=className, nowrap="on"), text="pheno", idx=2), + THCell(HT.TD('Authors',HT.BR(),HT.BR(), Class=className, nowrap="on"), text="auth", idx=3), + THCell(HT.TD('Year',HT.BR(),HT.BR(), Class=className, nowrap="on"), text="year", idx=4), + THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="lrs", idx=5), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="lrs_location", idx=6)]] + + for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)"]): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + return tblobj_header + + def getTableBodyForPublish(self, traitList, formName=None, worksheet=None, newrow=None, species=''): + + tblobj_body = [] + + className = "fs12 fwn b1 c222" + + for thisTrait in traitList: + tr = [] + + if not thisTrait.haveinfo: + thisTrait.retrieveInfo(QTL=1) + + trId = str(thisTrait) + + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class=className), text=trId)) + + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showDatabase3('%s','%s','%s','')" % (formName, thisTrait.db.name, thisTrait.name), Class="fs12 fwn"), nowrap="yes",align="center", Class=className),str(thisTrait.name), thisTrait.name)) + + PhenotypeString = thisTrait.post_publication_description + if thisTrait.confidential: + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): + PhenotypeString = thisTrait.pre_publication_description + tr.append(TDCell(HT.TD(PhenotypeString, Class=className), PhenotypeString, PhenotypeString.upper())) + + tr.append(TDCell(HT.TD(thisTrait.authors, Class="fs12 fwn b1 c222 fsI"),thisTrait.authors, thisTrait.authors.strip().upper())) + + try: + PubMedLinkText = myear = repr = int(thisTrait.year) + except: + PubMedLinkText = repr = "N/A" + myear = 0 + + if thisTrait.pubmed_id: + PubMedLink = HT.Href(text= repr,url= webqtlConfig.PUBMEDLINK_URL % thisTrait.pubmed_id,target='_blank', Class="fs12 fwn") + else: + PubMedLink = repr + + tr.append(TDCell(HT.TD(PubMedLink, Class=className, align='center'), repr, myear)) + + #LRS and its location + LRS_score_repr = 'N/A' + LRS_score_value = 0 + LRS_location_repr = 'N/A' + LRS_location_value = 1000000 + LRS_flag = 1 + + + if thisTrait.lrs: + LRS_score_repr = '%3.1f' % thisTrait.lrs + LRS_score_value = thisTrait.lrs + tr.append(TDCell(HT.TD(LRS_score_repr, Class=className), LRS_score_repr, LRS_score_value)) + + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, thisTrait.locus)) + result = self.cursor.fetchone() + + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] + + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) + LRS_flag = 0 + + tr.append(TDCell(HT.TD(LRS_location_repr, Class=className, nowrap="on"), LRS_location_repr, LRS_location_value)) + + else: + tr.append(TDCell(HT.TD("N/A", Class=className), "N/A", "N/A")) + tr.append(TDCell(HT.TD("N/A", Class=className), "N/A", "N/A")) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, PhenotypeString, thisTrait.authors, thisTrait.year, thisTrait.pubmed_id, LRS_score_repr, LRS_location_repr]): + worksheet.write([newrow, ncol], item) + + newrow += 1 + + return tblobj_body + + def getTableHeaderForProbeSet(self, worksheet=None, newrow=None, headingStyle=None): + + tblobj_header = [] + + className = "fs13 fwb ffl b1 cw cbrb" + + tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), + THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="record_id", idx=1), + THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="symbol", idx=2), + THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="desc", idx=3), + THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="location", idx=4), + THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="mean", idx=5), + THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="lrs", idx=6), + THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="lrs_location", idx=7)]] + + for ncol, item in enumerate(['Record', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr, Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)']): + worksheet.write([newrow, ncol], item, headingStyle) + worksheet.set_column([ncol, ncol], 2*len(item)) + + return tblobj_header + + def getTableBodyForProbeSet(self, traitList=[], primaryTrait=None, formName=None, worksheet=None, newrow=None, species=''): + + tblobj_body = [] + + className = "fs12 fwn b1 c222" + + for thisTrait in traitList: + + if not thisTrait.haveinfo: + thisTrait.retrieveInfo(QTL=1) + + if thisTrait.symbol: + pass + else: + thisTrait.symbol = "N/A" + + tr = [] + + trId = str(thisTrait) + + #XZ, 12/08/2008: checkbox + tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) + + #XZ, 12/08/2008: probeset name + if thisTrait.cellid: + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name, url="javascript:showDatabase3('%s','%s','%s','%s')" % (formName, thisTrait.db.name,thisTrait.name,thisTrait.cellid), Class="fs12 fwn"), Class=className), thisTrait.name, thisTrait.name.upper())) + else: + tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name, url="javascript:showDatabase3('%s','%s','%s','')" % (formName, thisTrait.db.name,thisTrait.name), Class="fs12 fwn"), Class=className), thisTrait.name, thisTrait.name.upper())) + + if thisTrait.geneid: + symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % thisTrait.geneid, Class="font_black fs12 fwn") + else: + symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&DB=gene&term=%s" % thisTrait.symbol, Class="font_black fs12 fwn") + + #XZ, 12/08/2008: gene symbol + tr.append(TDCell(HT.TD(symbolurl, Class="fs12 fwn b1 c222 fsI"),thisTrait.symbol, thisTrait.symbol.upper())) + + #XZ, 12/08/2008: description + #XZ, 06/05/2009: Rob asked to add probe target description + description_string = str(thisTrait.description).strip() + target_string = str(thisTrait.probe_target_description).strip() + + description_display = '' + + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = thisTrait.symbol + + if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + description_display = description_display + '; ' + target_string.strip() + + tr.append(TDCell(HT.TD(description_display, Class=className), description_display, description_display)) + + #XZ: trait_location_value is used for sorting + trait_location_repr = 'N/A' + trait_location_value = 1000000 + + if thisTrait.chr and thisTrait.mb: + try: + trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb + except: + if thisTrait.chr.upper() == 'X': + trait_location_value = 20*1000 + thisTrait.mb + else: + trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb + + trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) + + tr.append(TDCell(HT.TD(trait_location_repr, Class=className, nowrap="on"), trait_location_repr, trait_location_value)) + + #XZ, 01/12/08: This SQL query is much faster. + self.cursor.execute(""" + select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %d and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' + """ % (thisTrait.db.id, thisTrait.name)) + result = self.cursor.fetchone() + if result: + if result[0]: + mean = result[0] + else: + mean=0 + else: + mean = 0 + + #XZ, 06/05/2009: It is neccessary to turn on nowrap + repr = "%2.3f" % mean + tr.append(TDCell(HT.TD(repr, Class=className, align='right', nowrap='ON'),repr, mean)) + + #LRS and its location + LRS_score_repr = 'N/A' + LRS_score_value = 0 + LRS_location_repr = 'N/A' + LRS_location_value = 1000000 + LRS_flag = 1 + + #Max LRS and its Locus location + if thisTrait.lrs and thisTrait.locus: + self.cursor.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """ % (species, thisTrait.locus)) + result = self.cursor.fetchone() + + if result: + if result[0] and result[1]: + LRS_Chr = result[0] + LRS_Mb = result[1] + + #XZ: LRS_location_value is used for sorting + try: + LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) + except: + if LRS_Chr.upper() == 'X': + LRS_location_value = 20*1000 + float(LRS_Mb) + else: + LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + + LRS_score_repr = '%3.1f' % thisTrait.lrs + LRS_score_value = thisTrait.lrs + LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) + LRS_flag = 0 + + #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class=className, align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_score_repr, Class=className, align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class=className, nowrap="on"), LRS_location_repr, LRS_location_value)) + + if LRS_flag: + tr.append(TDCell(HT.TD(LRS_score_repr, Class=className), LRS_score_repr, LRS_score_value)) + tr.append(TDCell(HT.TD(LRS_location_repr, Class=className), LRS_location_repr, LRS_location_value)) + + else: + tr.append(TDCell(HT.TD("N/A", Class=className), "N/A", "N/A")) + tr.append(TDCell(HT.TD("N/A", Class=className), "N/A", "N/A")) + + tblobj_body.append(tr) + + for ncol, item in enumerate([thisTrait.name, thisTrait.geneid, thisTrait.homologeneid, thisTrait.symbol, description_display, trait_location_repr, mean, LRS_score_repr, LRS_location_repr]): + worksheet.write([newrow, ncol], item) + + + newrow += 1 + + return tblobj_body + + def createExcelFileWithTitleAndFooter(self, workbook=None, identification=None, db=None, returnNumber=None): + + worksheet = workbook.add_worksheet() + + titleStyle = workbook.add_format(align = 'left', bold = 0, size=14, border = 1, border_color="gray") + + ##Write title Info + # Modified by Hongqiang Li + worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) + worksheet.write([1, 0], "Citations: Please see %s/reference.html" % webqtlConfig.PORTADDR, titleStyle) + worksheet.write([2, 0], "Trait : %s" % identification, titleStyle) + worksheet.write([3, 0], "Database : %s" % db.fullname, titleStyle) + worksheet.write([4, 0], "Date : %s" % time.strftime("%B %d, %Y", time.gmtime()), titleStyle) + worksheet.write([5, 0], "Time : %s GMT" % time.strftime("%H:%M ", time.gmtime()), titleStyle) + worksheet.write([6, 0], "Status of data ownership: Possibly unpublished data; please see %s/statusandContact.html for details on sources, ownership, and usage of these data." % webqtlConfig.PORTADDR, titleStyle) + #Write footer info + worksheet.write([9 + returnNumber, 0], "Funding for The GeneNetwork: NIAAA (U01AA13499, U24AA13513), NIDA, NIMH, and NIAAA (P20-DA21131), NCI MMHCC (U01CA105417), and NCRR (U01NR 105417)", titleStyle) + worksheet.write([10 + returnNumber, 0], "PLEASE RETAIN DATA SOURCE INFORMATION WHENEVER POSSIBLE", titleStyle) + + return worksheet + + def getSortByValue(self, datasetType=''): + + if datasetType == 'Geno': + sortby = ("location", "up") + elif datasetType == 'ProbeSet': + sortby = ("symbol", "up") + else: #Phenotype + sortby = ("record_id", "down") + + return sortby + + diff --git a/web/webqtl/search/TextSearchPage.py b/web/webqtl/search/TextSearchPage.py new file mode 100755 index 00000000..42ff72c4 --- /dev/null +++ b/web/webqtl/search/TextSearchPage.py @@ -0,0 +1,536 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +import string +import os +import cPickle +from math import * + +import reaper +from htmlgen import HTMLgen2 as HT + +from base import admin +from base import webqtlConfig +from base.templatePage import templatePage +from utility import webqtlUtil +from base.webqtlDataset import webqtlDataset +from base.webqtlTrait import webqtlTrait +from utility.THCell import THCell +from utility.TDCell import TDCell +from utility import webqtlUtil + + + +class TextSearchPage(templatePage): + maxReturn = 200 + + def __init__(self, fd): + + templatePage.__init__(self, fd) + + if not self.openMysql(): + return + + # updated by NL, deleted jquery here, move it to dhtml.js + self.dict['js1'] = '' + + species_list = [] #List of species (mouse, rat, human), with the selected species listed first + + input_species = string.strip(string.lower(fd.formdata.getfirst('species', "mouse"))) #XZ, Oct 28, 2009: I changed the default species to mouse. + species_list.append(input_species) + #Create list of species (mouse, rat, human) with the species the user selected first + for species in ["mouse","rat","human"]: + if species not in species_list: + species_list.append(species) + + ADMIN_tissue_alias = admin.ADMIN_tissue_alias + + tissue = string.strip(string.lower(fd.formdata.getfirst('tissue', ""))) + if tissue: + try: + rev_ADMIN_tissue_alias = {} + for key in ADMIN_tissue_alias.keys(): + rev_ADMIN_tissue_alias[key] = key + for alias in ADMIN_tissue_alias[key]: + rev_ADMIN_tissue_alias[alias.upper()] = key + tissue = rev_ADMIN_tissue_alias[tissue.upper()] + except: + tissue = "UNKNOWN" + + #possibly text output + txtOutput = [] #ZS: if format=text + all_species_dataset_count = 0 #XZ: count of datasets across all species; used in the opening text of the page + all_species_trait_count = 0 #XZ: count of records across all species; used in opening text of the page and text file (if format = text) + + #div containing the tabs (species_container), the tabs themselves (species_tab_list, which is inserted into species_tabs), and the table (species_table) containing both the tissue and results tables for each tab + species_container = HT.Div(id="species_tabs", Class="tab_container") #Div that will contain tabs for mouse/rat/human species; each tab contains a table with the result count for each tissue group + species_tab_list = [HT.Href(text="%s" % species_list[0].capitalize(), url="#species1"), HT.Href(text="%s" % species_list[1].capitalize(), url="#species2"), HT.Href(text="%s" % species_list[2].capitalize(), url="#species3")] + species_tabs = HT.List(species_tab_list, Class="tabs") + species_table = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%",border=0, align="Left") + + for i in range(len(species_list)): + species_container_table = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%",border=0, align="Left") #ZS: Table containing both the tissue record count and trait record tables as cells; this fixes a display issue in some browsers that places the tables side by side instead of top/bottom + + species = species_list[i] + ADMIN_search_dbs = admin.ADMIN_search_dbs[species] + this_species_dataset_count = 0 #XZ: count of the datasets containing results for this species + this_species_trait_count = 0 #XZ: count of the trait records for this species + + div = HT.Div(id="species%s" % (i+1), Class="tab_content") + tab_container = HT.Span() #ZS: Contains species_container_table within the species' tab + + tissuePageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%",border=0, align="Left") + tissue_tblobj = {} # object used to create the table listing the results for each tissue + tissue_tblobj['header'] = self.getTissueTableHeader() # creates header for table listing results for selected tissue + + traitPageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%",border=0, align="Left") + trait_tblobj = {} # object used to create the table listing the trait results for each tissue + trait_tblobj['header'] = self.getTraitTableHeader() # creates header for table listing trait results for selected tissue + + tissue_tblobj['body'], trait_tblobj['body'], this_species_dataset_count, this_species_trait_count, this_species_txtOutput = self.createTableBodies(fd, species, tissue, ADMIN_search_dbs) + + if species == input_species: + txtOutput = this_species_txtOutput + + filename1 = webqtlUtil.genRandStr("Search_") #filename for tissue table object + tissue_objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename1), 'wb') + cPickle.dump(tissue_tblobj, tissue_objfile) + tissue_objfile.close() + + tissue_sortby = self.getTissueSortByValue() # sets how the tissue table should be sorted by default + tissue_div = HT.Div(webqtlUtil.genTableObj(tblobj=tissue_tblobj, file=filename1, sortby=tissue_sortby, tableID = "tissue_sort%s" % (i+1), addIndex = "1"), Id="tissue_sort%s" % (i+1)) + + tissuePageTable.append(HT.TR(HT.TD(" "))) + tissuePageTable.append(HT.TR(HT.TD(tissue_div))) + tissuePageTable.append(HT.TR(HT.TD(" "))) + species_container_table.append(HT.TR(HT.TD(tissuePageTable)), HT.TR(HT.TD(" "))) + + + filename2 = webqtlUtil.genRandStr("Search_") #filename for trait table object + trait_objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename2), 'wb') + cPickle.dump(trait_tblobj, trait_objfile) + trait_objfile.close() + + trait_sortby = self.getTraitSortByValue() # sets how the trait table should be sorted by default + trait_div = HT.Div(webqtlUtil.genTableObj(tblobj=trait_tblobj, file=filename2, sortby=trait_sortby, tableID = "results_sort%s" % (i+1), addIndex = "0"), Id="results_sort%s" % (i+1)) + + traitPageTable.append(HT.TR(HT.TD(" "))) + traitPageTable.append(HT.TR(HT.TD(trait_div))) + traitPageTable.append(HT.TR(HT.TD(" "))) + species_container_table.append(HT.TR(HT.TD(traitPageTable)), HT.TR(HT.TD(" "))) + + if this_species_trait_count == 0: + tab_container.append(HT.Div("No records retrieved for this species.", align="left", valign="top", style="font-size:42")) + else: + tab_container.append(species_container_table) + + all_species_dataset_count += this_species_dataset_count + all_species_trait_count += this_species_trait_count + + div.append(tab_container) + species_table.append(HT.TR(HT.TD(div))) + + species_container.append(species_table) + + + + + if fd.returnFmt != 'text': #if the format is not 'text' + self.dict['title'] = 'Search Results' + TD_LR = HT.TD(height=100,width="100%",bgColor='#fafafa',valign="top") + pageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%",border=0, align="Left") # Table containing all of the page's elements (opening text, form); in some browers the elements arrange themselves horizontally if you don't put them into a table, so this fixes that problem + + formTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="100%",border=0) # Table containing all of the form's elements (tabs, option buttons); used to correct the same issue mentioned in pageTable's comment + + mainForm = HT.Form( cgi= os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name='showDatabase', submit=HT.Input(type='hidden')) + hddn = {'FormID':'showDatabase','ProbeSetID':'_','database':'_','CellID':'_','RISet':fd.RISet} + hddn['incparentsf1']='ON' + for key in hddn.keys(): + mainForm.append(HT.Input(name=key, value=hddn[key], type='hidden')) + + #Add to collection, select all, invert selection, and deselect all ("reset") buttons + addselect = HT.Href(url="#redirect", Class="add_traits") + addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") + addselect.append(addselect_img) + selectall = HT.Href(url="#redirect", onClick="checkAll(document.getElementsByName('showDatabase')[0]);") + selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") + selectall.append(selectall_img) + selectinvert = HT.Href(url="#redirect", onClick="checkInvert(document.getElementsByName('showDatabase')[0];") + selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") + selectinvert.append(selectinvert_img) + reset = HT.Href(url="#redirect", onClick="checkNone(document.getElementsByName('showDatabase')[0]); return false;") + reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") + reset.append(reset_img) + + #Table with select, deselect, invert, etc. It is used for the results table. + optionsTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="20%",border=0) + optionsRow = HT.TR(HT.TD(selectall, width="25%"), HT.TD(reset, width="25%"), HT.TD(selectinvert, width="25%"), HT.TD(addselect, width="25%")) + labelsRow = HT.TR(HT.TD(" "*2,"Select", width="25%"), HT.TD(" ","Deselect", width="25%"), HT.TD(" "*3,"Invert", width="25%"), HT.TD(" "*4,"Add", width="25%")) + optionsTable.append(HT.TR(HT.TD(" ")), optionsRow, labelsRow) + + if fd.geneName: + searchType = "gene name " + fd.geneName + elif fd.refseq: + searchType = "RefSeq accession number " + fd.refseq + elif fd.genbankid: + searchType = "Genbank ID " + fd.genbankid + elif fd.geneid: + searchType = "Gene ID " + fd.geneid + else: + searchType = "" + + SearchText = HT.Span("You searched for the %s in GeneNetwork." % searchType, HT.BR(), + "We queried %s expression datasets across %s species and listed the results" % (all_species_dataset_count, len(species_list)), HT.BR(), + "below. A total of %s records that may be of interest to you were found. The" % all_species_trait_count, HT.BR(), + "top table lists the number of results found for each relevant tissue, and the", HT.BR(), + "bottom gives a basic summary of each result. To study one of the results, click", HT.BR(), + "its Record ID. More detailed information is also available for each result's group", HT.BR() , + "and dataset. To switch between species, click the tab with the corresponding", HT.BR(), + "label.", HT.BR(), HT.BR(), + "Please visit the links to the right to learn more about the variety of features", HT.BR(), + "available within GeneNetwork.") + + LinkText = HT.Span() + + mainLink = HT.Href(url="/webqtl/main.py", text = "Main Search Page", target="_blank") + homeLink = HT.Href(url="/home.html", text = "What is GeneNetwork?", target="_blank") + tourLink = HT.Href(url="/tutorial/WebQTLTour/", text = "Tour of GeneNetwork (20-40 min)", target="_blank") + faqLink = HT.Href(url="/faq.html", text = "Frequently Asked Questions", target="_blank") + glossaryLink = HT.Href(url="/glossary.html", text = "Glossary of terms used throughout GeneNetwork", target="_blank") + + LinkText.append(mainLink, HT.BR(), homeLink, HT.BR(), tourLink, HT.BR(), faqLink, HT.BR(), glossaryLink) + + formTable.append(HT.TR(HT.TD(species_tabs, species_container)), HT.TR(HT.TD(optionsTable))) + mainForm.append(formTable) + + + if fd.geneName: + SearchHeading = HT.Paragraph('Search Results for gene name ', fd.geneName) + elif fd.refseq: + SearchHeading = HT.Paragraph('Search Results for RefSeq accession number ', fd.refseq) + elif fd.genbankid: + SearchHeading = HT.Paragraph('Search Results for Genbank ID ', fd.genbankid) + elif fd.geneid: + SearchHeading = HT.Paragraph('Search Results for Gene ID ', fd.geneid) + else: + SearchHeading = HT.Paragraph('') + + SearchHeading.__setattr__("class","title") + + pageTable.append(HT.TR(HT.TD(SearchText, width=600), HT.TD(LinkText, align="left", valign="top")), HT.TR(HT.TD(" ", colspan=2)), HT.TR(HT.TD(mainForm, colspan=2))) + TD_LR.append(SearchHeading, pageTable) + self.dict['body'] = TD_LR + else: + if len(txtOutput) == 0: + self.output = "##No records were found for this species. \n" + else: + self.output = "##A total of %d records were returned. \n" % all_species_trait_count + newOutput = [] + strainLists = {} + for item in txtOutput: + tissueGrp, thisTrait = item + RISet = thisTrait.riset + if strainLists.has_key(RISet): + thisStrainlist = strainLists[RISet] + else: + thisGenotype = reaper.Dataset() + thisGenotype.read(os.path.join(webqtlConfig.GENODIR, RISet + '.geno')) + if thisGenotype.type == "riset": + _f1, _f12, _mat, _pat = webqtlUtil.ParInfo[RISet] + thisGenotype = thisGenotype.add(Mat=_mat, Pat=_pat, F1=_f1) + thisStrainlist = list(thisGenotype.prgy) + strainLists[RISet] = thisStrainlist + thisTrait.retrieveData(strainlist=thisStrainlist) + thisData = [] + for item in thisStrainlist: + if thisTrait.data.has_key(item): thisData.append(thisTrait.data[item].val) + else: thisData.append(None) + newOutput.append(["Structure", "Database", "ProbeSetID", "Cross"] + thisStrainlist) + newOutput.append([tissueGrp, '"%s"' % thisTrait.db.fullname, thisTrait.name, RISet]+map(str,thisData)) + newOutput = webqtlUtil.asymTranspose(newOutput) + for item in newOutput: + self.output += string.join(item, "\t") + "\n" + + + def createTableBodies(self, fd, species, tissue, ADMIN_search_dbs): + + this_species_txtOutput = [] + + #priority GeneName > refseq > genbankid + this_species_trait_count = 0 #count of all traits in this species + this_species_dataset_count = 0 #Number of datasets in this species + row_count = 0 #Index number used in the first row of the trait table + trait_tblobj_body = [] #body of table with the results themselves; + tissue_tblobj_body = [] #body of table with the number of results for each tissue group + className = "fs12 fwn b1 c222" + + for i, tissueGrp in enumerate(ADMIN_search_dbs.keys()): + if tissue and tissue.upper() != tissueGrp.upper(): + continue + dbNames = ADMIN_search_dbs[tissueGrp] + + tissue_tr = [] #Table row for tissue group + tissue_tr.append(TDCell(HT.TD('', Class=className))) + tissue_tr.append(TDCell(HT.TD(tissueGrp.capitalize(), Class=className), tissueGrp, tissueGrp)) #Append cell with tissue name to row + + this_tissue_record_count = 0 #Count of the results for each tissue + for dbName in dbNames: + this_species_dataset_count += 1 + thisDB = webqtlDataset(dbName, self.cursor) + + if fd.geneName: + if fd.searchAlias: + self.cursor.execute("""SELECT ProbeSet.Name + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetFreeze.Name = "%s" AND + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId AND + MATCH (ProbeSet.symbol, alias) AGAINST ("+%s" IN BOOLEAN MODE) AND + ProbeSet.Id = ProbeSetXRef.ProbeSetId""" % (dbName, fd.geneName)) + else: + self.cursor.execute("""SELECT ProbeSet.Name + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetFreeze.Name = "%s" AND + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId AND + ProbeSet.symbol = "%s" AND + ProbeSet.Id = ProbeSetXRef.ProbeSetId""" % (dbName, fd.geneName)) + elif fd.refseq: + + # XZ, Oct/08/2009: Search for RefSeq ID is kind of tricky. One probeset can have multiple RefseqIDs that are delimited by ' /// ' (currently). + # So I have to use 'like' instead of '=' in SQL query. But user search with one short string, for example 'NM_1', it will return thousands of results. + # To prevent this, I set the restriction that the length of input Refseq ID must be at least 9 characters. Otherwise, do NOT start searching. + # Even with the restriction of input RefSeqID, I'm still worried about the 'like' in SQL query. My concern is in future, there might be RefSeqIDs with + # 10 characters whose first 9 characters are the same as the existing ones. So I decide to further check the result. We should also consider that the + # RefSeqID in database may have version number such as "NM_177938.2". If the input RefSeqID is 'NM_177938', it should be matched. I think we should get rid of the version number in database. + + if len(fd.refseq) < 9: + if fd.returnFmt != 'text': + heading = "Search Result" + detail = ["The RefSeq ID that you inputed is less than 9 characters. GeneNetwork thinks it is not a legitimate RefSeq ID and did not do the search. Please try to use a RefSeq ID with at least 9 characters."] + self.error(heading=heading,detail=detail,error="Not Found") + else: + self.output = "#The gene name or IDs you submitted did not match any record in the databases available. You may try different gene names or tissue type." + return + else: + sqlString = """SELECT ProbeSet.Id, ProbeSet.RefSeq_TranscriptId + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetFreeze.Name = "%s" AND + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId AND + MATCH(ProbeSet.RefSeq_TranscriptId) AGAINST ("+%s" IN BOOLEAN MODE) AND + ProbeSet.Id = ProbeSetXRef.ProbeSetId""" % (dbName, fd.refseq) + + self.cursor.execute(sqlString) + + results = self.cursor.fetchall() + if results: + Id_of_really_matched_probeset = [] + + for one_result in results: + ProbeSet_Id, ProbeSet_RefSeq_TranscriptId = one_result + multiple_RefSeqId = string.split(string.strip(ProbeSet_RefSeq_TranscriptId), '///') + for one_RefSeqId in multiple_RefSeqId: + tokens = string.split( one_RefSeqId, '.' ) + one_RefSeqId_without_versionNum = string.strip(tokens[0]) + if one_RefSeqId_without_versionNum == fd.refseq: + Id_of_really_matched_probeset.append( ProbeSet_Id ) + break + + if Id_of_really_matched_probeset: + condition_string = " or ".join(["Id = %s" % one_ID for one_ID in Id_of_really_matched_probeset]) + sqlString = """SELECT ProbeSet.Name from ProbeSet where (%s)""" % condition_string + + self.cursor.execute(sqlString) + else: + pass + + elif fd.genbankid: + self.cursor.execute("""SELECT ProbeSet.Name + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetFreeze.Name = "%s" AND + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId AND + ProbeSet.GenbankId = "%s" AND + ProbeSet.Id = ProbeSetXRef.ProbeSetId""" % (dbName, fd.genbankid)) + elif fd.geneid: + self.cursor.execute("""SELECT ProbeSet.Name + FROM + ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetFreeze.Name = "%s" AND + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId AND + ProbeSet.GeneId = "%s" AND + ProbeSet.Id = ProbeSetXRef.ProbeSetId""" % (dbName, fd.geneid)) + else: + continue + + results = self.cursor.fetchall() + if len(results) > 0: + this_tissue_record_count += len(results) + this_species_trait_count += this_tissue_record_count + + for result in results: + _ProbeSetID = result[0] + thisTrait = webqtlTrait(db=thisDB, name=_ProbeSetID, cursor=self.cursor) + results_tr = [] + trId = str(thisTrait) + _traitUrl = thisTrait.genHTML(dispFromDatabase=1) + _traitName = str(thisTrait) + + #ZS: check box column + results_tr.append(TDCell(HT.TD(str(row_count+1), HT.Input(type="checkbox", Class="checkallbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", align="right", Class=className), str(row_count+1), row_count+1)) + row_count += 1 + + #ZS: Tissue column + results_tr.append(TDCell(HT.TD(tissueGrp.capitalize(), Class=className), tissueGrp, tissueGrp)) + + #ZS: Group column + risetUrl = HT.Href(text=thisTrait.riset, url="http://www.genenetwork.org/%sCross.html#%s" % (species, thisTrait.riset), target="_blank", Class=className) + results_tr.append(TDCell(HT.TD(risetUrl, Class=className), thisTrait.riset, thisTrait.riset)) + + #ZS: Dataset column + results_tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.db.fullname, url = webqtlConfig.INFOPAGEHREF % thisTrait.db.name, + target='_blank', Class="fs13 fwn non_bold"), Class=className), thisTrait.db.name.upper(), thisTrait.db.name.upper())) + + #ZS: Trait ID column + results_tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.getGivenName(),url="javascript:showDatabase3('%s','%s','%s','')" % ('showDatabase', thisTrait.db.name, thisTrait.name), Class="fs12 fwn"), nowrap="yes",align="left", Class=className),str(thisTrait.name), thisTrait.name)) + + #ZS: Symbol column and Description column + description_string = str(thisTrait.description).strip() + if (thisTrait.db.type == "ProbeSet"): + target_string = str(thisTrait.probe_target_description).strip() + + description_display = '' + + if len(description_string) > 1 and description_string != 'None': + description_display = description_string + else: + description_display = thisTrait.symbol + + if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + description_display = description_display + '; ' + target_string.strip() + + description_string = description_display + else: + results_tr.append(TDCell(HT.TD("--", align="left", Class=className), "--", "Zz")) + + results_tr.append(TDCell(HT.TD(description_string, Class=className), description_string, description_string)) + + #XZ: trait_location_value is used for sorting + trait_location_repr = "--" + trait_location_value = 1000000 + + if hasattr(thisTrait, 'chr') and hasattr(thisTrait, 'mb') and thisTrait.chr and thisTrait.mb: + try: + trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb + except: + if thisTrait.chr.upper() == "X": + trait_location_value = 20*1000 + thisTrait.mb + else: + trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb + + trait_location_repr = "Chr%s: %.6f" % (thisTrait.chr, float(thisTrait.mb) ) + + results_tr.append(TDCell(HT.TD(trait_location_repr, nowrap='ON', Class=className), trait_location_repr, trait_location_value)) + + #ZS: Mean column + self.cursor.execute(""" + select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %d and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' + """ % (thisTrait.db.id, thisTrait.name)) + result = self.cursor.fetchone() + if result: + if result[0]: + mean = result[0] + else: + mean=0 + else: + mean = 0 + + repr = "%2.3f" % mean + results_tr.append(TDCell(HT.TD(repr, Class=className, align='right', nowrap='ON'),repr, mean)) + trait_tblobj_body.append(results_tr) + + this_species_txtOutput.append([tissueGrp, thisTrait]) + + + tissue_tr.append(TDCell(HT.TD(str(this_tissue_record_count), Class=className), str(this_tissue_record_count), this_tissue_record_count)) + tissue_tblobj_body.append(tissue_tr) + + self.output = "self.output" + + return tissue_tblobj_body, trait_tblobj_body, this_species_dataset_count, this_species_trait_count, this_species_txtOutput + + + def getTissueSortByValue(self): + + sortby = ("tissue_group", "up") + + return sortby + + + def getTraitSortByValue(self): + + sortby = ("tissue", "up") + + return sortby + + + def getTissueTableHeader(self): + + tblobj_header = [] + + className = "fs13 fwb ffl b1 cw cbrb" + + tblobj_header = [[THCell(HT.TD(' ', Class=className, nowrap="on"), sort=0), + THCell(HT.TD('Tissue',HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="tissue_group", idx=1), + THCell(HT.TD('Results', HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="results", idx=2)]] + + return tblobj_header + + def getTraitTableHeader(self): + + tblobj_header = [] + + className = "fs13 fwb ffl b1 cw cbrb" + + tblobj_header = [[THCell(HT.TD('Index',HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="index", idx=0), + THCell(HT.TD('Tissue',HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="tissue", idx=1), + THCell(HT.TD('Group',HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="group", idx=2), + THCell(HT.TD('Dataset', HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="dataset", idx=3), + THCell(HT.TD('Record ID', HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="name", idx=4), + THCell(HT.TD('Description', HT.BR(), HT.BR(), valign="top", Class=className, nowrap="on"), text="desc", idx=5), + THCell(HT.TD('Location', HT.BR(), 'Chr and Mb', HT.BR(), valign="top", Class=className, nowrap="on"), text="location", idx=6), + THCell(HT.TD('Mean', HT.BR(), 'Expr', HT.BR(), valign="top", Class=className, nowrap="on"), text="mean", idx=7)]] + + return tblobj_header diff --git a/web/webqtl/search/__init__.py b/web/webqtl/search/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/web/webqtl/search/pubmedsearch.py b/web/webqtl/search/pubmedsearch.py new file mode 100755 index 00000000..d0d18ff5 --- /dev/null +++ b/web/webqtl/search/pubmedsearch.py @@ -0,0 +1,12 @@ +import sys +import os +import MySQLdb +import time + +db='db_webqtl_leiyan' +author="megan memphis" + +con = MySQLdb.Connect(db=db,user='webqtlupd',passwd='webqtl', host="localhost") +cursor = con.cursor() +cursor.execute('select PhenotypeId, Locus, DataId, Phenotype.Post_publication_description from PublishXRef, Phenotype where PublishXRef.PhenotypeId = Phenotype.Id and InbredSetId=%s'%InbredSetId) +PublishXRefInfos = cursor.fetchall() -- cgit v1.2.3