From a2d766fafb5192a9cb9775c9cc2e7cffe8aad411 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 27 Mar 2013 23:39:47 +0000 Subject: Got quick_search_table.py to a point where it runs; will know tomorrow whether it ran correctly or not --- wqflask/maintenance/quick_search_table.py | 162 ++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 22 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 51426ab2..63fb7da5 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -1,17 +1,34 @@ from __future__ import print_function, division, absolute_import +""" +Results will be returned for each of several trait types: mRNA assays, phenotypes, genotypes, and +(maybe later) genes + +For each trait type, the results for each species should be given; for example, have a "Mouse" tab +with the mouse traits in a table inside it + +This table will then list each trait, its dataset, and several columns determined by its trait type +(phenotype, genotype, etc) + +""" + + import sys sys.path.append("../../..") +import simplejson as json + import sqlalchemy as sa from sqlalchemy.dialects import mysql from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.ext.declarative import declarative_base -import zach_settings as zs +from BeautifulSoup import UnicodeDammit + +import zach_settings as settings -Engine = sa.create_engine(zs.SQLALCHEMY_DATABASE_URI, +Engine = sa.create_engine(settings.SQLALCHEMY_DATABASE_URI, #encoding='utf-8', #client_encoding='utf-8', #echo="debug", @@ -29,20 +46,32 @@ class ProbeSet(Base): __tablename__ = 'ProbeSet' __table_args__ = {'autoload': True} -#QuickSearch = sa.Table("QuickSearch", Metadata, -# sa.Column('table_name', sa.String), -# sa.Column('the_key', sa.String), -# sa.Column('terms', sa.String)) - -class QuickSearch(Base): - table_name = Column(String) - the_key = Column(String) - terms = Column(String) - - def __init__(self, table_name, the_key, terms): - self.table_name = table_name - self.the_key = the_key - self.terms = get_unique_terms(terms) +QuickSearch = sa.Table("QuickSearch", Metadata, + sa.Column('table_name', sa.String(15), + primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from + sa.Column('the_key', sa.String(30), + primary_key=True, nullable=False, autoincrement=False), # key in database table + sa.Column('terms', sa.Text), # terms to compare search string with + sa.Column('species', sa.Text), + sa.Column('result_fields', sa.Text) # json + ) + +#QuickSearch.drop(Engine, checkfirst=True) +Metadata.create_all(Engine) + +#class QuickSearch(Base): +# table_name = Column(String) +# the_key = Column(String) +# terms = Column(String) +# +# def __init__(self, table_name, the_key, terms, category, species, result_fields): +# self.table_name = table_name +# self.the_key = the_key +# self.terms = terms +# self.species = species +# self.category = category +# self.result_fields = json.dumps(sort_keys=True) + def get_unique_terms(*args): if not args: @@ -57,20 +86,109 @@ def get_unique_terms(*args): token = token[1:] if token.endswith((')', ']')): token = token[:-1] + if token.endswith(';'): + token = token[:-1] if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + unique.add(token) print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) - def main(): + conn = Engine.connect() + counter = 0 for ps in page_query(Session.query(ProbeSet)): #all() - terms = get_unique_terms(ps.Name, - ps.Symbol, - ps.description) - - + values = {} + values['table_name'] = "ProbeSet" + values['the_key'] = ps.Id + values['terms'] = get_unique_terms(ps.Name, ps.Symbol, ps.description, ps.alias) + print("terms is:", values['terms']) + values['species'] = get_species("ProbeSet", ps.Id) + values['result_fields'] = get_result_fields("ProbeSet", ps.Id) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) + counter += 1 + print("Done:", counter) + + +def get_species(trait_type, trait_id): + if trait_type == "ProbeSet": + print("Before species query") + results = Session.query("Name").from_statement("SELECT Species.Name " + "FROM ProbeSetXRef, " + "ProbeSetFreeze, " + "ProbeFreeze, " + "InbredSet, " + "Species " + "WHERE ProbeSetXRef.ProbeSetId =:probeset_id and " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and " + "ProbeFreeze.InbredSetId = InbredSet.Id and " + "InbredSet.SpeciesId = Species.Id").params(probeset_id=trait_id).all() + print("After query") + assert len(set([result.Name for result in results])) == 1, "Multiple names?" + + print("species is:", results[0].Name) + + return results[0].Name + +def get_result_fields(trait_type, trait_id): + if trait_type == "ProbeSet": + print("qs1") + results = Session.query( + "name", "symbol", "description", "chr", "mb" + ).from_statement( + "SELECT ProbeSet.Name as name, " + "ProbeSet.Symbol as symbol, " + "ProbeSet.description as description, " + "ProbeSet.Chr as chr, " + "ProbeSet.Mb as mb " + "FROM ProbeSet " + "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=trait_id).all() + print("qs2") + for result in results: + print(result) + assert len(set(result for result in results)) == 1, "Different results" + + print("results are:", results) + result = results[0] + result = row2dict(result) + try: + json_results = json.dumps(result, sort_keys=True) + except UnicodeDecodeError: + print("\n\nTrying to massage unicode\n\n") + #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__)) + #resultd = dict(**result.__dict__) + for key, value in result.iteritems(): + print(" key is:", key) + print(" value is:", value) + if isinstance(value, basestring): + result[key] = value.decode('utf-8', errors='ignore') + json_results = json.dumps(result, sort_keys=True) + + #print("json is: ", json_results) + + return json_results + + +def row2dict(row): + return dict(zip(row.keys(), row)) # http://stackoverflow.com/a/2848519/1175849 + #"""http://stackoverflow.com/a/1960546/1175849""" + #d = {} + #for column in row.__table__.columns: + # d[column.name] = getattr(row, column.name) + # + #return d + def page_query(q): """http://stackoverflow.com/a/1217947/1175849""" offset = 0 -- cgit v1.2.3