diff options
Diffstat (limited to 'web/webqtl')
-rwxr-xr-x | web/webqtl/search/PubmedSearch.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/web/webqtl/search/PubmedSearch.py b/web/webqtl/search/PubmedSearch.py new file mode 100755 index 00000000..17c1d1ec --- /dev/null +++ b/web/webqtl/search/PubmedSearch.py @@ -0,0 +1,94 @@ +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +import re + +from dbFunction import webqtlDatabaseFunction + +import logging +logging.basicConfig(filename="/tmp/gn_log_leiyan", level=logging.INFO) +_log = logging.getLogger("PubmedSearch") + +######################################### +# name=megan inst=washington +######################################### + +class PubmedSearch: + + def __init__(self, s, ProbeSetFreezeId): + cursor = webqtlDatabaseFunction.getCursor() + if (not cursor): + return + self.olds = s + self.news = s + sql = "SELECT ProbeSet.Symbol FROM pubmedsearch,ProbeSet,ProbeSetXRef WHERE " + # + pattern_name = re.compile('\s*name\s*[:=]((\s*\(.+?\)\s*)|(\s*\S+\s*))', re.I) + search_name = pattern_name.search(self.news) + if search_name: + self.news = self.news.replace(search_name.group(), ' ') + keywords = search_name.group(1) + keywords = keywords.strip() + keywords = keywords.strip('(') + keywords = keywords.strip(')') + keywords = keywords.strip() + keywords = keywords.split() + for keyword in keywords: + sql += "(MATCH (pubmedsearch.authorfullname,authorshortname) AGAINST ('%s' IN BOOLEAN MODE)) AND " % keyword + _log.info("news_1: "+self.news) + # + pattern_inst = re.compile('\s*inst\s*[:=]((\s*\(.+?\)\s*)|(\s*\S+\s*))', re.I) + search_inst = pattern_inst.search(self.news) + if search_inst: + self.news = self.news.replace(search_inst.group(), ' ') + keywords = search_inst.group(1) + keywords = keywords.strip() + keywords = keywords.strip('(') + keywords = keywords.strip(')') + keywords = keywords.strip() + keywords = keywords.split() + for keyword in keywords: + sql += "(MATCH (pubmedsearch.institute) AGAINST ('%s' IN BOOLEAN MODE)) AND " % keyword + _log.info("news_2: "+self.news) + # + if search_name or search_inst: + sql += "pubmedsearch.geneid=ProbeSet.GeneId AND " + sql += "ProbeSet.Id=ProbeSetXRef.ProbeSetId AND " + sql += "ProbeSetXRef.ProbeSetFreezeId=%d " % ProbeSetFreezeId + sql += "GROUP BY ProbeSet.Symbol;" + _log.info("sql: "+sql) + cursor.execute(sql) + symbols1 = cursor.fetchall() + symbols2 = '' + for symbol in symbols1: + symbols2 += (symbol[0]+' ') + self.news = symbols2 + self.news + _log.info("symbols2: "+symbols2) + else: + self.news = self.olds + + def getNewS(self): + return self.news
\ No newline at end of file |