aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorZachary Sloan2013-03-27 23:39:47 +0000
committerZachary Sloan2013-03-27 23:39:47 +0000
commita2d766fafb5192a9cb9775c9cc2e7cffe8aad411 (patch)
treeb74c0c44c126beaafc9ffbfe250a4455c69c2d40 /wqflask/maintenance
parent2ead7d163451f17775ff99f048f66c17d2a7f20f (diff)
downloadgenenetwork2-a2d766fafb5192a9cb9775c9cc2e7cffe8aad411.tar.gz
Got quick_search_table.py to a point where it runs; will
know tomorrow whether it ran correctly or not
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/quick_search_table.py162
1 files changed, 140 insertions, 22 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 51426ab2..63fb7da5 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -1,17 +1,34 @@
from __future__ import print_function, division, absolute_import
+"""
+Results will be returned for each of several trait types: mRNA assays, phenotypes, genotypes, and
+(maybe later) genes
+
+For each trait type, the results for each species should be given; for example, have a "Mouse" tab
+with the mouse traits in a table inside it
+
+This table will then list each trait, its dataset, and several columns determined by its trait type
+(phenotype, genotype, etc)
+
+"""
+
+
import sys
sys.path.append("../../..")
+import simplejson as json
+
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.ext.declarative import declarative_base
-import zach_settings as zs
+from BeautifulSoup import UnicodeDammit
+
+import zach_settings as settings
-Engine = sa.create_engine(zs.SQLALCHEMY_DATABASE_URI,
+Engine = sa.create_engine(settings.SQLALCHEMY_DATABASE_URI,
#encoding='utf-8',
#client_encoding='utf-8',
#echo="debug",
@@ -29,20 +46,32 @@ class ProbeSet(Base):
__tablename__ = 'ProbeSet'
__table_args__ = {'autoload': True}
-#QuickSearch = sa.Table("QuickSearch", Metadata,
-# sa.Column('table_name', sa.String),
-# sa.Column('the_key', sa.String),
-# sa.Column('terms', sa.String))
-
-class QuickSearch(Base):
- table_name = Column(String)
- the_key = Column(String)
- terms = Column(String)
-
- def __init__(self, table_name, the_key, terms):
- self.table_name = table_name
- self.the_key = the_key
- self.terms = get_unique_terms(terms)
+QuickSearch = sa.Table("QuickSearch", Metadata,
+ sa.Column('table_name', sa.String(15),
+ primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from
+ sa.Column('the_key', sa.String(30),
+ primary_key=True, nullable=False, autoincrement=False), # key in database table
+ sa.Column('terms', sa.Text), # terms to compare search string with
+ sa.Column('species', sa.Text),
+ sa.Column('result_fields', sa.Text) # json
+ )
+
+#QuickSearch.drop(Engine, checkfirst=True)
+Metadata.create_all(Engine)
+
+#class QuickSearch(Base):
+# table_name = Column(String)
+# the_key = Column(String)
+# terms = Column(String)
+#
+# def __init__(self, table_name, the_key, terms, category, species, result_fields):
+# self.table_name = table_name
+# self.the_key = the_key
+# self.terms = terms
+# self.species = species
+# self.category = category
+# self.result_fields = json.dumps(sort_keys=True)
+
def get_unique_terms(*args):
if not args:
@@ -57,20 +86,109 @@ def get_unique_terms(*args):
token = token[1:]
if token.endswith((')', ']')):
token = token[:-1]
+ if token.endswith(';'):
+ token = token[:-1]
if len(token) > 2:
+ try:
+ # This hopefully ensures that the token is utf-8
+ token = token.encode('utf-8')
+ print(" ->", token)
+ except UnicodeDecodeError:
+ print("\n-- UDE \n")
+ # Can't get it into utf-8, we won't use it
+ continue
+
unique.add(token)
print("\nUnique terms are: {}\n".format(unique))
return " ".join(unique)
-
def main():
+ conn = Engine.connect()
+ counter = 0
for ps in page_query(Session.query(ProbeSet)): #all()
- terms = get_unique_terms(ps.Name,
- ps.Symbol,
- ps.description)
-
-
+ values = {}
+ values['table_name'] = "ProbeSet"
+ values['the_key'] = ps.Id
+ values['terms'] = get_unique_terms(ps.Name, ps.Symbol, ps.description, ps.alias)
+ print("terms is:", values['terms'])
+ values['species'] = get_species("ProbeSet", ps.Id)
+ values['result_fields'] = get_result_fields("ProbeSet", ps.Id)
+ ins = QuickSearch.insert().values(**values)
+ conn.execute(ins)
+ counter += 1
+ print("Done:", counter)
+
+
+def get_species(trait_type, trait_id):
+ if trait_type == "ProbeSet":
+ print("Before species query")
+ results = Session.query("Name").from_statement("SELECT Species.Name "
+ "FROM ProbeSetXRef, "
+ "ProbeSetFreeze, "
+ "ProbeFreeze, "
+ "InbredSet, "
+ "Species "
+ "WHERE ProbeSetXRef.ProbeSetId =:probeset_id and "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and "
+ "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
+ "ProbeFreeze.InbredSetId = InbredSet.Id and "
+ "InbredSet.SpeciesId = Species.Id").params(probeset_id=trait_id).all()
+ print("After query")
+ assert len(set([result.Name for result in results])) == 1, "Multiple names?"
+
+ print("species is:", results[0].Name)
+
+ return results[0].Name
+
+def get_result_fields(trait_type, trait_id):
+ if trait_type == "ProbeSet":
+ print("qs1")
+ results = Session.query(
+ "name", "symbol", "description", "chr", "mb"
+ ).from_statement(
+ "SELECT ProbeSet.Name as name, "
+ "ProbeSet.Symbol as symbol, "
+ "ProbeSet.description as description, "
+ "ProbeSet.Chr as chr, "
+ "ProbeSet.Mb as mb "
+ "FROM ProbeSet "
+ "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=trait_id).all()
+ print("qs2")
+ for result in results:
+ print(result)
+ assert len(set(result for result in results)) == 1, "Different results"
+
+ print("results are:", results)
+ result = results[0]
+ result = row2dict(result)
+ try:
+ json_results = json.dumps(result, sort_keys=True)
+ except UnicodeDecodeError:
+ print("\n\nTrying to massage unicode\n\n")
+ #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
+ #resultd = dict(**result.__dict__)
+ for key, value in result.iteritems():
+ print(" key is:", key)
+ print(" value is:", value)
+ if isinstance(value, basestring):
+ result[key] = value.decode('utf-8', errors='ignore')
+ json_results = json.dumps(result, sort_keys=True)
+
+ #print("json is: ", json_results)
+
+ return json_results
+
+
+def row2dict(row):
+ return dict(zip(row.keys(), row)) # http://stackoverflow.com/a/2848519/1175849
+ #"""http://stackoverflow.com/a/1960546/1175849"""
+ #d = {}
+ #for column in row.__table__.columns:
+ # d[column.name] = getattr(row, column.name)
+ #
+ #return d
+
def page_query(q):
"""http://stackoverflow.com/a/1217947/1175849"""
offset = 0