From 06367191cc7179a606c7533cead23c1de127eb13 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 26 Mar 2013 22:18:33 +0000 Subject: Created quick_search_table.py to create table with all of the information needed for the the quick search features (which searches across all datasets) --- wqflask/maintenance/quick_search_table.py | 50 +++++++++++++++++++++++++++++++ wqflask/wqflask/do_search.py | 41 ++++++++++++++++++------- 2 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 wqflask/maintenance/quick_search_table.py diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py new file mode 100644 index 00000000..a337a543 --- /dev/null +++ b/wqflask/maintenance/quick_search_table.py @@ -0,0 +1,50 @@ +from __future__ import print_function, division, absolute_import + +import sys +sys.path.append("../../..") + +import sqlalchemy as sa +from sqlalchemy.dialects import mysql +from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.ext.declarative import declarative_base + +import zach_settings as zs + +Engine = sa.create_engine(zs.SQLALCHEMY_DATABASE_URI, + #encoding='utf-8', + #client_encoding='utf-8', + echo="debug", + ) + +Session = scoped_session(sessionmaker(bind=Engine)) #, extension=VersionedListener())) +#Xsession = Session() + +Base = declarative_base(bind=Engine) +Metadata = sa.MetaData() +Metadata.bind = Engine + + +class ProbeSet(Base): + __tablename__ = 'ProbeSet' + __table_args__ = {'autoload': True} + +def main(): + for ps in page_query(Session.query(ProbeSet)): #all() + #for ps in probe_sets: + print("--->", ps) + +def page_query(q): + """http://stackoverflow.com/a/1217947/1175849""" + offset = 0 + while True: + r = False + for elem in q.limit(1000).offset(offset): + r = True + yield elem + offset += 1000 + if not r: + break + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 1b1b56fb..bbb16054 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -13,6 +13,7 @@ sys.path.append("..") from dbFunction import webqtlDatabaseFunction +from utility.benchmark import Bench class DoSearch(object): """Parent class containing parameters/functions used for all searches""" @@ -63,9 +64,9 @@ class DoSearch(object): class QuickMrnaAssaySearch(DoSearch): """A general search for mRNA assays""" - + DoSearch.search_types['quick_mrna_assay'] = "QuickMrnaAssaySearch" - + base_query = """SELECT ProbeSet.Name as ProbeSet_Name, ProbeSet.Symbol as ProbeSet_Symbol, ProbeSet.description as ProbeSet_Description, @@ -73,7 +74,7 @@ class QuickMrnaAssaySearch(DoSearch): ProbeSet.Mb as ProbeSet_Mb, ProbeSet.name_num as ProbeSet_name_num FROM ProbeSet """ - + header_fields = ['', 'Record ID', 'Symbol', @@ -190,6 +191,23 @@ class PhenotypeSearch(DoSearch): 'Max LRS', 'Max LRS Location'] + #def get_fields_clause(self): + # """Generate clause for WHERE portion of query""" + # + # #Todo: Zach will figure out exactly what both these lines mean + # #and comment here + # if "'" not in self.search_term[0]: + # search_term = "[[:<:]]" + self.search_term[0] + "[[:>:]]" + # + # # This adds a clause to the query that matches the search term + # # against each field in the search_fields tuple + # fields_clause = [] + # for field in self.search_fields: + # fields_clause.append('''%s REGEXP "%s"''' % (field, search_term)) + # fields_clause = "(%s) and " % ' OR '.join(fields_clause) + # + # return fields_clause + def get_fields_clause(self): """Generate clause for WHERE portion of query""" @@ -200,13 +218,13 @@ class PhenotypeSearch(DoSearch): # This adds a clause to the query that matches the search term # against each field in the search_fields tuple - fields_clause = [] - for field in self.search_fields: - fields_clause.append('''%s REGEXP "%s"''' % (field, search_term)) - fields_clause = "(%s) and " % ' OR '.join(fields_clause) + fields_clause = "MATCH(" + fields_clause += ",".join(self.search_fields) + ") " + fields_clause += "AGAINST('{}' IN BOOLEAN MODE)".format(self.search_term[0]) return fields_clause + def compile_final_query(self, from_clause = '', where_clause = ''): """Generates the final query string""" @@ -262,13 +280,13 @@ class QuickPhenotypeSearch(PhenotypeSearch): 'Publication.PubMed_ID', 'Publication.Abstract', 'Publication.Title', - 'Publication.Authors') + 'Publication.Authors') def compile_final_query(self, where_clause = ''): """Generates the final query string""" query = (self.base_query + - """WHERE %s + """WHERE (%s) and PublishXRef.PhenotypeId = Phenotype.Id and PublishXRef.PublicationId = Publication.Id and PublishXRef.InbredSetId = InbredSet.Id and @@ -283,7 +301,10 @@ class QuickPhenotypeSearch(PhenotypeSearch): query = self.compile_final_query(where_clause = self.get_fields_clause()) - return self.execute(query) + with Bench("Doing quick phenotype search"): + results = self.execute(query) + + return results class GenotypeSearch(DoSearch): """A search within a genotype dataset""" -- cgit v1.2.3