web/webqtl/search/PubmedSearch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by GeneNetwork Core Team 2010/10/20

import re

from dbFunction import webqtlDatabaseFunction

import logging
logging.basicConfig(filename="/tmp/gn_log_leiyan", level=logging.INFO)
_log = logging.getLogger("PubmedSearch")

#########################################
# name=megan inst=washington
#########################################

class PubmedSearch:

		def __init__(self, s, ProbeSetFreezeId):
				cursor = webqtlDatabaseFunction.getCursor()
				if (not cursor):
						return
				self.olds = s
				self.news = s
				sql = "SELECT ProbeSet.Symbol FROM pubmedsearch,ProbeSet,ProbeSetXRef WHERE "
				#
				pattern_name = re.compile('\s*name\s*[:=]((\s*\(.+?\)\s*)|(\s*\S+\s*))', re.I)
				search_name = pattern_name.search(self.news)
				if search_name:
					self.news = self.news.replace(search_name.group(), ' ')
					keywords = search_name.group(1)
					keywords = keywords.strip()
					keywords = keywords.strip('(')
					keywords = keywords.strip(')')
					keywords = keywords.strip()
					keywords = keywords.split()
					for keyword in keywords:
						sql += "(MATCH (pubmedsearch.authorfullname,authorshortname) AGAINST ('%s' IN BOOLEAN MODE)) AND " % keyword
				_log.info("news_1: "+self.news)
				#
				pattern_inst = re.compile('\s*inst\s*[:=]((\s*\(.+?\)\s*)|(\s*\S+\s*))', re.I)
				search_inst = pattern_inst.search(self.news)
				if search_inst:
					self.news = self.news.replace(search_inst.group(), ' ')
					keywords = search_inst.group(1)
					keywords = keywords.strip()
					keywords = keywords.strip('(')
					keywords = keywords.strip(')')
					keywords = keywords.strip()
					keywords = keywords.split()
					for keyword in keywords:
						sql += "(MATCH (pubmedsearch.institute) AGAINST ('%s' IN BOOLEAN MODE)) AND " % keyword
				_log.info("news_2: "+self.news)
				#
				if search_name or search_inst:
					sql += "pubmedsearch.geneid=ProbeSet.GeneId AND "
					sql += "ProbeSet.Id=ProbeSetXRef.ProbeSetId AND "
					sql += "ProbeSetXRef.ProbeSetFreezeId=%d " % ProbeSetFreezeId
					sql += "GROUP BY ProbeSet.Symbol;"
					_log.info("sql: "+sql)
					cursor.execute(sql)
					symbols1 = cursor.fetchall()
					symbols2 = ''
					for symbol in symbols1:
						symbols2 += (symbol[0]+' ')
					self.news = symbols2 + self.news
					_log.info("symbols2: "+symbols2)
				else:
					self.news = self.olds

		def getNewS(self):
				return self.news