aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorZachary Sloan2013-03-28 21:11:37 +0000
committerZachary Sloan2013-03-28 21:11:37 +0000
commit38a2c6e46f467f7f2b7e82873ab8abfe61096f9b (patch)
treead95e961035bb4dab0dbebc80a64b08b3b2f8fef /wqflask/maintenance
parent6952803ae8772cb8958aaf206689c61318bbd128 (diff)
downloadgenenetwork2-38a2c6e46f467f7f2b7e82873ab8abfe61096f9b.tar.gz
quick_search_table.py now seems to run correctly for ProbeSetXRef
rows
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/quick_search_table.py291
1 files changed, 227 insertions, 64 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 132d1b72..60446f79 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -42,10 +42,170 @@ Metadata = sa.MetaData()
Metadata.bind = Engine
-class ProbeSet(Base):
+class ProbeSetXRef(Base):
__tablename__ = 'ProbeSetXRef'
- __table_args__ = {'autoload': True}
+ ProbeSetFreezeId = sa.Column(sa.Integer, primary_key=True)
+ ProbeSetId = sa.Column(sa.Integer, primary_key=True)
+ DataId = sa.Column(sa.Integer, unique=True)
+ Locus_old = sa.Column(sa.Text)
+ LRS_old = sa.Column(sa.Float)
+ pValue_old = sa.Column(sa.Float)
+ mean = sa.Column(sa.Float)
+ se = sa.Column(sa.Float)
+ Locus = sa.Column(sa.Text)
+ LRS = sa.Column(sa.Float)
+ pValue = sa.Column(sa.Float)
+ additive = sa.Column(sa.Float)
+ h2 = sa.Column(sa.Float)
+
+ #__mapper_args__ = {'primary_key':[ProbeSetXRef.ProbeSetId, ProbeSetXRef.ProbeSetFreezeId]}
+
+ @classmethod
+ def run(cls):
+ conn = Engine.connect()
+ counter = 0
+ for ps in page_query(Session.query(cls)): #all()
+ values = {}
+ values['table_name'] = cls.__tablename__
+ values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
+ values['terms'] = cls.get_unique_terms(ps.ProbeSetId)
+ print("terms is:", values['terms'])
+ #values['species'] = get_species("ProbeSet", ps.Id)
+ values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId)
+ ins = QuickSearch.insert().values(**values)
+ conn.execute(ins)
+ counter += 1
+ print("Done:", counter)
+
+ @staticmethod
+ def get_unique_terms(probeset_id):
+ results = Session.query(
+ "name",
+ "symbol",
+ "description",
+ "alias"
+ ).from_statement(
+ "SELECT ProbeSet.Name as name, "
+ "ProbeSet.Symbol as symbol, "
+ "ProbeSet.description as description, "
+ "ProbeSet.alias as alias "
+ "FROM ProbeSet "
+ "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all()
+
+ unique = set()
+ for item in results[0]:
+ #print("locals:", locals())
+ if not item:
+ continue
+ for token in item.split():
+ if token.startswith(('(','[')):
+ token = token[1:]
+ if token.endswith((')', ']')):
+ token = token[:-1]
+ if token.endswith(';'):
+ token = token[:-1]
+ if len(token) > 2:
+ try:
+ # This hopefully ensures that the token is utf-8
+ token = token.encode('utf-8')
+ print(" ->", token)
+ except UnicodeDecodeError:
+ print("\n-- UDE \n")
+ # Can't get it into utf-8, we won't use it
+ continue
+
+ unique.add(token)
+ print("\nUnique terms are: {}\n".format(unique))
+ return " ".join(unique)
+
+ #def get_species(dataset_id):
+ # print("Before species query")
+ # results = Session.query("Name").from_statement("SELECT Species.Name "
+ # "FROM ProbeSetXRef, "
+ # "ProbeSetFreeze, "
+ # "ProbeFreeze, "
+ # "InbredSet, "
+ # "Species "
+ # "WHERE ProbeSetFreeze.Id =:probeset_freeze_id and "
+ # "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
+ # "ProbeFreeze.InbredSetId = InbredSet.Id and "
+ # "InbredSet.SpeciesId = Species.Id").params(probeset_freeze_id=dataset_id).all()
+ # print("After query")
+ #
+ # assert len(set([result.Name for result in results])) == 1, "Multiple names?"
+ #
+ # print("species is:", results[0].Name)
+ #
+ # return results[0].Name
+
+ @staticmethod
+ def get_result_fields(probeset_id, dataset_id):
+ results = Session.query(
+ "name",
+ "species",
+ "dataset",
+ "dataset_name",
+ "symbol",
+ "description",
+ "chr", "mb",
+ "lrs",
+ "genbank_id",
+ "gene_id",
+ "chip_id",
+ "chip_name"
+ ).from_statement(
+ "SELECT ProbeSet.Name as name, "
+ "Species.Name as species, "
+ "ProbeSetFreeze.Name as dataset, "
+ "ProbeSetFreeze.FullName as dataset_name, "
+ "ProbeSet.Symbol as symbol, "
+ "ProbeSet.description as description, "
+ "ProbeSet.Chr as chr, "
+ "ProbeSet.Mb as mb, "
+ "ProbeSetXRef.LRS as lrs, "
+ "ProbeSet.GenbankId as genbank_id, "
+ "ProbeSet.GeneId as gene_id, "
+ "ProbeSet.ChipId as chip_id, "
+ "GeneChip.Name as chip_name "
+ "FROM ProbeSet, "
+ "ProbeSetXRef, "
+ "ProbeSetFreeze, "
+ "ProbeFreeze, "
+ "InbredSet, "
+ "Species, "
+ "GeneChip "
+ "WHERE ProbeSetXRef.ProbeSetId = :probeset_id and "
+ "ProbeSetXRef.ProbeSetFreezeId = :dataset_id and "
+ "ProbeSetXRef.ProbeSetId = ProbeSet.Id and "
+ "ProbeSet.ChipId = GeneChip.Id and "
+ "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and "
+ "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
+ "ProbeFreeze.InbredSetId = InbredSet.Id and "
+ "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id,
+ dataset_id=dataset_id).all()
+ for result in results:
+ print(result)
+ assert len(set(result for result in results)) == 1, "Different results"
+
+ print("results are:", results)
+ result = results[0]
+ result = row2dict(result)
+ try:
+ json_results = json.dumps(result, sort_keys=True)
+ except UnicodeDecodeError:
+ print("\n\nTrying to massage unicode\n\n")
+ for key, value in result.iteritems():
+ print("\tkey is:", key)
+ print("\tvalue is:", value)
+ if isinstance(value, basestring):
+ result[key] = value.decode('utf-8', errors='ignore')
+ json_results = json.dumps(result, sort_keys=True)
+
+ #print("json is: ", json_results)
+
+ return json_results
+
QuickSearch = sa.Table("QuickSearch", Metadata,
sa.Column('table_name', sa.String(15),
primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from
@@ -77,7 +237,7 @@ def get_unique_terms(trait_type, trait_id):
#if not args:
# return None
- if trait_type="ProbeSet":
+ if trait_type=="ProbeSet":
results = Session.query(
"name",
"symbol",
@@ -120,18 +280,21 @@ def get_unique_terms(trait_type, trait_id):
def main():
conn = Engine.connect()
counter = 0
- for ps in page_query(Session.query(ProbeSet)): #all()
- values = {}
- values['table_name'] = "ProbeSetXRef"
- values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
- values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId)
- print("terms is:", values['terms'])
- #values['species'] = get_species("ProbeSet", ps.Id)
- values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId)
- ins = QuickSearch.insert().values(**values)
- conn.execute(ins)
- counter += 1
- print("Done:", counter)
+
+ ProbeSetXRef.run()
+
+ #for ps in page_query(Session.query(ProbeSet)): #all()
+ # values = {}
+ # values['table_name'] = "ProbeSetXRef"
+ # values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
+ # values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId)
+ # print("terms is:", values['terms'])
+ # #values['species'] = get_species("ProbeSet", ps.Id)
+ # values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId)
+ # ins = QuickSearch.insert().values(**values)
+ # conn.execute(ins)
+ # counter += 1
+ # print("Done:", counter)
def get_species(trait_type, trait_id):
@@ -156,55 +319,55 @@ def get_species(trait_type, trait_id):
return results[0].Name
-def get_result_fields(trait_type, *args):
- if trait_type == "ProbeSet":
- print("qs1")
- results = Session.query(
- "name",
- "symbol",
- "description",
- "chr", "mb",
- "genbank_id",
- "gene_id",
- "chip_id",
- "chip_name"
- ).from_statement(
- "SELECT ProbeSet.Name as name, "
- "ProbeSet.Symbol as symbol, "
- "ProbeSet.description as description, "
- "ProbeSet.Chr as chr, "
- "ProbeSet.Mb as mb, "
- "ProbeSet.GenbankId as genbank_id, "
- "ProbeSet.GeneId as gene_id, "
- "ProbeSet.ChipId as chip_id, "
- "GeneChip.Name as chip_name "
- "FROM ProbeSet, GeneChip "
- "WHERE ProbeSet.ChipId = GeneChip.Id and "
- "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all()
- print("qs2")
- for result in results:
- print(result)
- assert len(set(result for result in results)) == 1, "Different results"
-
- print("results are:", results)
- result = results[0]
- result = row2dict(result)
- try:
- json_results = json.dumps(result, sort_keys=True)
- except UnicodeDecodeError:
- print("\n\nTrying to massage unicode\n\n")
- #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
- #resultd = dict(**result.__dict__)
- for key, value in result.iteritems():
- print(" key is:", key)
- print(" value is:", value)
- if isinstance(value, basestring):
- result[key] = value.decode('utf-8', errors='ignore')
- json_results = json.dumps(result, sort_keys=True)
-
- #print("json is: ", json_results)
-
- return json_results
+#def get_result_fields(trait_type, *args):
+# if trait_type == "ProbeSet":
+# print("qs1")
+# results = Session.query(
+# "name",
+# "symbol",
+# "description",
+# "chr", "mb",
+# "genbank_id",
+# "gene_id",
+# "chip_id",
+# "chip_name"
+# ).from_statement(
+# "SELECT ProbeSet.Name as name, "
+# "ProbeSet.Symbol as symbol, "
+# "ProbeSet.description as description, "
+# "ProbeSet.Chr as chr, "
+# "ProbeSet.Mb as mb, "
+# "ProbeSet.GenbankId as genbank_id, "
+# "ProbeSet.GeneId as gene_id, "
+# "ProbeSet.ChipId as chip_id, "
+# "GeneChip.Name as chip_name "
+# "FROM ProbeSet, GeneChip "
+# "WHERE ProbeSet.ChipId = GeneChip.Id and "
+# "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all()
+# print("qs2")
+# for result in results:
+# print(result)
+# assert len(set(result for result in results)) == 1, "Different results"
+#
+# print("results are:", results)
+# result = results[0]
+# result = row2dict(result)
+# try:
+# json_results = json.dumps(result, sort_keys=True)
+# except UnicodeDecodeError:
+# print("\n\nTrying to massage unicode\n\n")
+# #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
+# #resultd = dict(**result.__dict__)
+# for key, value in result.iteritems():
+# print(" key is:", key)
+# print(" value is:", value)
+# if isinstance(value, basestring):
+# result[key] = value.decode('utf-8', errors='ignore')
+# json_results = json.dumps(result, sort_keys=True)
+#
+# #print("json is: ", json_results)
+#
+# return json_results
def row2dict(row):