quick_search_table.py now seems to run correctly for ProbeSetXRef

rows
author: Zachary Sloan 2013-03-28 21:11:37 +0000
committer: Zachary Sloan 2013-03-28 21:11:37 +0000
commit: 38a2c6e46f467f7f2b7e82873ab8abfe61096f9b (patch)
tree: ad95e961035bb4dab0dbebc80a64b08b3b2f8fef /wqflask
parent: 6952803ae8772cb8958aaf206689c61318bbd128 (diff)
download: genenetwork2-38a2c6e46f467f7f2b7e82873ab8abfe61096f9b.tar.gz
1 files changed, 227 insertions, 64 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 132d1b72..60446f79 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -42,10 +42,170 @@ Metadata = sa.MetaData()
 Metadata.bind = Engine
 
 
-class ProbeSet(Base):
+class ProbeSetXRef(Base):
     __tablename__ = 'ProbeSetXRef'
-    __table_args__ = {'autoload': True}
     
+    ProbeSetFreezeId = sa.Column(sa.Integer, primary_key=True)
+    ProbeSetId = sa.Column(sa.Integer, primary_key=True)
+    DataId = sa.Column(sa.Integer, unique=True)
+    Locus_old = sa.Column(sa.Text)
+    LRS_old = sa.Column(sa.Float)
+    pValue_old = sa.Column(sa.Float)
+    mean = sa.Column(sa.Float)
+    se = sa.Column(sa.Float)
+    Locus = sa.Column(sa.Text)
+    LRS = sa.Column(sa.Float)
+    pValue = sa.Column(sa.Float)
+    additive = sa.Column(sa.Float)
+    h2 = sa.Column(sa.Float)
+
+    #__mapper_args__ = {'primary_key':[ProbeSetXRef.ProbeSetId, ProbeSetXRef.ProbeSetFreezeId]}
+
+    @classmethod
+    def run(cls):
+        conn = Engine.connect()
+        counter = 0
+        for ps in page_query(Session.query(cls)):   #all()
+            values = {}
+            values['table_name'] = cls.__tablename__
+            values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
+            values['terms'] = cls.get_unique_terms(ps.ProbeSetId)
+            print("terms is:", values['terms'])
+            #values['species'] = get_species("ProbeSet", ps.Id)
+            values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId)
+            ins = QuickSearch.insert().values(**values)
+            conn.execute(ins)
+            counter += 1
+            print("Done:", counter)        
+    
+    @staticmethod
+    def get_unique_terms(probeset_id):
+        results = Session.query(
+                "name",
+                "symbol",
+                "description",
+                "alias"
+            ).from_statement(
+                "SELECT ProbeSet.Name as name, "
+                "ProbeSet.Symbol as symbol, "
+                "ProbeSet.description as description, "
+                "ProbeSet.alias as alias "
+                "FROM ProbeSet "
+                "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all()
+        
+        unique = set()
+        for item in results[0]:
+            #print("locals:", locals())
+            if not item:
+                continue
+            for token in item.split():
+                if token.startswith(('(','[')):
+                    token = token[1:]
+                if token.endswith((')', ']')):
+                    token = token[:-1]
+                if token.endswith(';'):
+                    token = token[:-1]
+                if len(token) > 2:
+                    try:
+                        # This hopefully ensures that the token is utf-8
+                        token = token.encode('utf-8')
+                        print(" ->", token)
+                    except UnicodeDecodeError:
+                        print("\n-- UDE \n")
+                        # Can't get it into utf-8, we won't use it
+                        continue 
+                    
+                    unique.add(token)
+        print("\nUnique terms are: {}\n".format(unique))
+        return " ".join(unique)
+
+    #def get_species(dataset_id):
+    #    print("Before species query")
+    #    results = Session.query("Name").from_statement("SELECT Species.Name "
+    #                "FROM ProbeSetXRef, "
+    #                "ProbeSetFreeze, "
+    #                "ProbeFreeze, "
+    #                "InbredSet, "
+    #                "Species "
+    #                "WHERE ProbeSetFreeze.Id =:probeset_freeze_id and "
+    #                "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
+    #                "ProbeFreeze.InbredSetId = InbredSet.Id and "
+    #                "InbredSet.SpeciesId = Species.Id").params(probeset_freeze_id=dataset_id).all()
+    #    print("After query")
+    #
+    #    assert len(set([result.Name for result in results])) == 1, "Multiple names?"
+    #
+    #    print("species is:", results[0].Name)
+    #
+    #    return results[0].Name
+
+    @staticmethod
+    def get_result_fields(probeset_id, dataset_id):
+        results = Session.query(
+                "name",
+                "species",
+                "dataset",
+                "dataset_name",
+                "symbol",
+                "description",
+                "chr", "mb",
+                "lrs",
+                "genbank_id",
+                "gene_id",
+                "chip_id",
+                "chip_name"
+            ).from_statement(
+                "SELECT ProbeSet.Name as name, "
+                "Species.Name as species, "
+                "ProbeSetFreeze.Name as dataset, "
+                "ProbeSetFreeze.FullName as dataset_name, "
+                "ProbeSet.Symbol as symbol, "
+                "ProbeSet.description as description, "
+                "ProbeSet.Chr as chr, "
+                "ProbeSet.Mb as mb, "
+                "ProbeSetXRef.LRS as lrs, "
+                "ProbeSet.GenbankId as genbank_id, "
+                "ProbeSet.GeneId as gene_id, "
+                "ProbeSet.ChipId as chip_id, "
+                "GeneChip.Name as chip_name "
+                "FROM ProbeSet, "
+                "ProbeSetXRef, "
+                "ProbeSetFreeze, "
+                "ProbeFreeze, "
+                "InbredSet, "
+                "Species, "
+                "GeneChip "
+                "WHERE ProbeSetXRef.ProbeSetId = :probeset_id and "
+                "ProbeSetXRef.ProbeSetFreezeId = :dataset_id and "
+                "ProbeSetXRef.ProbeSetId = ProbeSet.Id and "
+                "ProbeSet.ChipId = GeneChip.Id and "
+                "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and "
+                "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
+                "ProbeFreeze.InbredSetId = InbredSet.Id and "
+                "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id,
+                                                                    dataset_id=dataset_id).all()
+        for result in results:
+            print(result)
+        assert len(set(result for result in results)) == 1, "Different results"
+        
+        print("results are:", results)
+        result = results[0]
+        result = row2dict(result)
+        try:
+            json_results = json.dumps(result, sort_keys=True)
+        except UnicodeDecodeError:
+            print("\n\nTrying to massage unicode\n\n")
+            for key, value in result.iteritems():
+                print("\tkey is:", key)
+                print("\tvalue is:", value)
+                if isinstance(value, basestring):
+                    result[key] = value.decode('utf-8', errors='ignore')
+            json_results = json.dumps(result, sort_keys=True)
+        
+        #print("json is: ", json_results)
+        
+        return json_results    
+
 QuickSearch = sa.Table("QuickSearch", Metadata,
         sa.Column('table_name', sa.String(15),
                   primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from
@@ -77,7 +237,7 @@ def get_unique_terms(trait_type, trait_id):
     #if not args:
     #    return None
     
-    if trait_type="ProbeSet":
+    if trait_type=="ProbeSet":
         results = Session.query(
                 "name",
                 "symbol",
@@ -120,18 +280,21 @@ def get_unique_terms(trait_type, trait_id):
 def main():
     conn = Engine.connect()
     counter = 0
-    for ps in page_query(Session.query(ProbeSet)):   #all()
-        values = {}
-        values['table_name'] = "ProbeSetXRef"
-        values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
-        values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId)
-        print("terms is:", values['terms'])
-        #values['species'] = get_species("ProbeSet", ps.Id)
-        values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId)
-        ins = QuickSearch.insert().values(**values)
-        conn.execute(ins)
-        counter += 1
-        print("Done:", counter)
+    
+    ProbeSetXRef.run()
+    
+    #for ps in page_query(Session.query(ProbeSet)):   #all()
+    #    values = {}
+    #    values['table_name'] = "ProbeSetXRef"
+    #    values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
+    #    values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId)
+    #    print("terms is:", values['terms'])
+    #    #values['species'] = get_species("ProbeSet", ps.Id)
+    #    values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId)
+    #    ins = QuickSearch.insert().values(**values)
+    #    conn.execute(ins)
+    #    counter += 1
+    #    print("Done:", counter)
 
 
 def get_species(trait_type, trait_id):
@@ -156,55 +319,55 @@ def get_species(trait_type, trait_id):
 
     return results[0].Name
 
-def get_result_fields(trait_type, *args):
-    if trait_type == "ProbeSet":
-        print("qs1")
-        results = Session.query(
-                "name",
-                "symbol",
-                "description",
-                "chr", "mb",
-                "genbank_id",
-                "gene_id",
-                "chip_id",
-                "chip_name"
-            ).from_statement(
-                "SELECT ProbeSet.Name as name, "
-                "ProbeSet.Symbol as symbol, "
-                "ProbeSet.description as description, "
-                "ProbeSet.Chr as chr, "
-                "ProbeSet.Mb as mb, "
-                "ProbeSet.GenbankId as genbank_id, "
-                "ProbeSet.GeneId as gene_id, "
-                "ProbeSet.ChipId as chip_id, "
-                "GeneChip.Name as chip_name "
-                "FROM ProbeSet, GeneChip "
-                "WHERE ProbeSet.ChipId = GeneChip.Id and "
-                "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all()
-        print("qs2")
-        for result in results:
-            print(result)
-        assert len(set(result for result in results)) == 1, "Different results"
-    
-    print("results are:", results)
-    result = results[0]
-    result = row2dict(result)
-    try:
-        json_results = json.dumps(result, sort_keys=True)
-    except UnicodeDecodeError:
-        print("\n\nTrying to massage unicode\n\n")
-        #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
-        #resultd = dict(**result.__dict__)
-        for key, value in result.iteritems():
-            print("   key is:", key)
-            print("   value is:", value)
-            if isinstance(value, basestring):
-                result[key] = value.decode('utf-8', errors='ignore')
-        json_results = json.dumps(result, sort_keys=True)
-    
-    #print("json is: ", json_results)
-    
-    return json_results
+#def get_result_fields(trait_type, *args):
+#    if trait_type == "ProbeSet":
+#        print("qs1")
+#        results = Session.query(
+#                "name",
+#                "symbol",
+#                "description",
+#                "chr", "mb",
+#                "genbank_id",
+#                "gene_id",
+#                "chip_id",
+#                "chip_name"
+#            ).from_statement(
+#                "SELECT ProbeSet.Name as name, "
+#                "ProbeSet.Symbol as symbol, "
+#                "ProbeSet.description as description, "
+#                "ProbeSet.Chr as chr, "
+#                "ProbeSet.Mb as mb, "
+#                "ProbeSet.GenbankId as genbank_id, "
+#                "ProbeSet.GeneId as gene_id, "
+#                "ProbeSet.ChipId as chip_id, "
+#                "GeneChip.Name as chip_name "
+#                "FROM ProbeSet, GeneChip "
+#                "WHERE ProbeSet.ChipId = GeneChip.Id and "
+#                "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all()
+#        print("qs2")
+#        for result in results:
+#            print(result)
+#        assert len(set(result for result in results)) == 1, "Different results"
+#    
+#    print("results are:", results)
+#    result = results[0]
+#    result = row2dict(result)
+#    try:
+#        json_results = json.dumps(result, sort_keys=True)
+#    except UnicodeDecodeError:
+#        print("\n\nTrying to massage unicode\n\n")
+#        #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
+#        #resultd = dict(**result.__dict__)
+#        for key, value in result.iteritems():
+#            print("   key is:", key)
+#            print("   value is:", value)
+#            if isinstance(value, basestring):
+#                result[key] = value.decode('utf-8', errors='ignore')
+#        json_results = json.dumps(result, sort_keys=True)
+#    
+#    #print("json is: ", json_results)
+#    
+#    return json_results
 
 
 def row2dict(row):
author	Zachary Sloan	2013-03-28 21:11:37 +0000
committer	Zachary Sloan	2013-03-28 21:11:37 +0000
commit	38a2c6e46f467f7f2b7e82873ab8abfe61096f9b (patch)
tree	ad95e961035bb4dab0dbebc80a64b08b3b2f8fef /wqflask
parent	6952803ae8772cb8958aaf206689c61318bbd128 (diff)
download	genenetwork2-38a2c6e46f467f7f2b7e82873ab8abfe61096f9b.tar.gz