diff options
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 291 |
1 files changed, 227 insertions, 64 deletions
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 132d1b72..60446f79 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -42,10 +42,170 @@ Metadata = sa.MetaData() Metadata.bind = Engine -class ProbeSet(Base): +class ProbeSetXRef(Base): __tablename__ = 'ProbeSetXRef' - __table_args__ = {'autoload': True} + ProbeSetFreezeId = sa.Column(sa.Integer, primary_key=True) + ProbeSetId = sa.Column(sa.Integer, primary_key=True) + DataId = sa.Column(sa.Integer, unique=True) + Locus_old = sa.Column(sa.Text) + LRS_old = sa.Column(sa.Float) + pValue_old = sa.Column(sa.Float) + mean = sa.Column(sa.Float) + se = sa.Column(sa.Float) + Locus = sa.Column(sa.Text) + LRS = sa.Column(sa.Float) + pValue = sa.Column(sa.Float) + additive = sa.Column(sa.Float) + h2 = sa.Column(sa.Float) + + #__mapper_args__ = {'primary_key':[ProbeSetXRef.ProbeSetId, ProbeSetXRef.ProbeSetFreezeId]} + + @classmethod + def run(cls): + conn = Engine.connect() + counter = 0 + for ps in page_query(Session.query(cls)): #all() + values = {} + values['table_name'] = cls.__tablename__ + values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId]) + values['terms'] = cls.get_unique_terms(ps.ProbeSetId) + print("terms is:", values['terms']) + #values['species'] = get_species("ProbeSet", ps.Id) + values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) + counter += 1 + print("Done:", counter) + + @staticmethod + def get_unique_terms(probeset_id): + results = Session.query( + "name", + "symbol", + "description", + "alias" + ).from_statement( + "SELECT ProbeSet.Name as name, " + "ProbeSet.Symbol as symbol, " + "ProbeSet.description as description, " + "ProbeSet.alias as alias " + "FROM ProbeSet " + "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all() + + unique = set() + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) + print("\nUnique terms are: {}\n".format(unique)) + return " ".join(unique) + + #def get_species(dataset_id): + # print("Before species query") + # results = Session.query("Name").from_statement("SELECT Species.Name " + # "FROM ProbeSetXRef, " + # "ProbeSetFreeze, " + # "ProbeFreeze, " + # "InbredSet, " + # "Species " + # "WHERE ProbeSetFreeze.Id =:probeset_freeze_id and " + # "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and " + # "ProbeFreeze.InbredSetId = InbredSet.Id and " + # "InbredSet.SpeciesId = Species.Id").params(probeset_freeze_id=dataset_id).all() + # print("After query") + # + # assert len(set([result.Name for result in results])) == 1, "Multiple names?" + # + # print("species is:", results[0].Name) + # + # return results[0].Name + + @staticmethod + def get_result_fields(probeset_id, dataset_id): + results = Session.query( + "name", + "species", + "dataset", + "dataset_name", + "symbol", + "description", + "chr", "mb", + "lrs", + "genbank_id", + "gene_id", + "chip_id", + "chip_name" + ).from_statement( + "SELECT ProbeSet.Name as name, " + "Species.Name as species, " + "ProbeSetFreeze.Name as dataset, " + "ProbeSetFreeze.FullName as dataset_name, " + "ProbeSet.Symbol as symbol, " + "ProbeSet.description as description, " + "ProbeSet.Chr as chr, " + "ProbeSet.Mb as mb, " + "ProbeSetXRef.LRS as lrs, " + "ProbeSet.GenbankId as genbank_id, " + "ProbeSet.GeneId as gene_id, " + "ProbeSet.ChipId as chip_id, " + "GeneChip.Name as chip_name " + "FROM ProbeSet, " + "ProbeSetXRef, " + "ProbeSetFreeze, " + "ProbeFreeze, " + "InbredSet, " + "Species, " + "GeneChip " + "WHERE ProbeSetXRef.ProbeSetId = :probeset_id and " + "ProbeSetXRef.ProbeSetFreezeId = :dataset_id and " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id and " + "ProbeSet.ChipId = GeneChip.Id and " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and " + "ProbeFreeze.InbredSetId = InbredSet.Id and " + "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id, + dataset_id=dataset_id).all() + for result in results: + print(result) + assert len(set(result for result in results)) == 1, "Different results" + + print("results are:", results) + result = results[0] + result = row2dict(result) + try: + json_results = json.dumps(result, sort_keys=True) + except UnicodeDecodeError: + print("\n\nTrying to massage unicode\n\n") + for key, value in result.iteritems(): + print("\tkey is:", key) + print("\tvalue is:", value) + if isinstance(value, basestring): + result[key] = value.decode('utf-8', errors='ignore') + json_results = json.dumps(result, sort_keys=True) + + #print("json is: ", json_results) + + return json_results + QuickSearch = sa.Table("QuickSearch", Metadata, sa.Column('table_name', sa.String(15), primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from @@ -77,7 +237,7 @@ def get_unique_terms(trait_type, trait_id): #if not args: # return None - if trait_type="ProbeSet": + if trait_type=="ProbeSet": results = Session.query( "name", "symbol", @@ -120,18 +280,21 @@ def get_unique_terms(trait_type, trait_id): def main(): conn = Engine.connect() counter = 0 - for ps in page_query(Session.query(ProbeSet)): #all() - values = {} - values['table_name'] = "ProbeSetXRef" - values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId]) - values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId) - print("terms is:", values['terms']) - #values['species'] = get_species("ProbeSet", ps.Id) - values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId) - ins = QuickSearch.insert().values(**values) - conn.execute(ins) - counter += 1 - print("Done:", counter) + + ProbeSetXRef.run() + + #for ps in page_query(Session.query(ProbeSet)): #all() + # values = {} + # values['table_name'] = "ProbeSetXRef" + # values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId]) + # values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId) + # print("terms is:", values['terms']) + # #values['species'] = get_species("ProbeSet", ps.Id) + # values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId) + # ins = QuickSearch.insert().values(**values) + # conn.execute(ins) + # counter += 1 + # print("Done:", counter) def get_species(trait_type, trait_id): @@ -156,55 +319,55 @@ def get_species(trait_type, trait_id): return results[0].Name -def get_result_fields(trait_type, *args): - if trait_type == "ProbeSet": - print("qs1") - results = Session.query( - "name", - "symbol", - "description", - "chr", "mb", - "genbank_id", - "gene_id", - "chip_id", - "chip_name" - ).from_statement( - "SELECT ProbeSet.Name as name, " - "ProbeSet.Symbol as symbol, " - "ProbeSet.description as description, " - "ProbeSet.Chr as chr, " - "ProbeSet.Mb as mb, " - "ProbeSet.GenbankId as genbank_id, " - "ProbeSet.GeneId as gene_id, " - "ProbeSet.ChipId as chip_id, " - "GeneChip.Name as chip_name " - "FROM ProbeSet, GeneChip " - "WHERE ProbeSet.ChipId = GeneChip.Id and " - "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all() - print("qs2") - for result in results: - print(result) - assert len(set(result for result in results)) == 1, "Different results" - - print("results are:", results) - result = results[0] - result = row2dict(result) - try: - json_results = json.dumps(result, sort_keys=True) - except UnicodeDecodeError: - print("\n\nTrying to massage unicode\n\n") - #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__)) - #resultd = dict(**result.__dict__) - for key, value in result.iteritems(): - print(" key is:", key) - print(" value is:", value) - if isinstance(value, basestring): - result[key] = value.decode('utf-8', errors='ignore') - json_results = json.dumps(result, sort_keys=True) - - #print("json is: ", json_results) - - return json_results +#def get_result_fields(trait_type, *args): +# if trait_type == "ProbeSet": +# print("qs1") +# results = Session.query( +# "name", +# "symbol", +# "description", +# "chr", "mb", +# "genbank_id", +# "gene_id", +# "chip_id", +# "chip_name" +# ).from_statement( +# "SELECT ProbeSet.Name as name, " +# "ProbeSet.Symbol as symbol, " +# "ProbeSet.description as description, " +# "ProbeSet.Chr as chr, " +# "ProbeSet.Mb as mb, " +# "ProbeSet.GenbankId as genbank_id, " +# "ProbeSet.GeneId as gene_id, " +# "ProbeSet.ChipId as chip_id, " +# "GeneChip.Name as chip_name " +# "FROM ProbeSet, GeneChip " +# "WHERE ProbeSet.ChipId = GeneChip.Id and " +# "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all() +# print("qs2") +# for result in results: +# print(result) +# assert len(set(result for result in results)) == 1, "Different results" +# +# print("results are:", results) +# result = results[0] +# result = row2dict(result) +# try: +# json_results = json.dumps(result, sort_keys=True) +# except UnicodeDecodeError: +# print("\n\nTrying to massage unicode\n\n") +# #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__)) +# #resultd = dict(**result.__dict__) +# for key, value in result.iteritems(): +# print(" key is:", key) +# print(" value is:", value) +# if isinstance(value, basestring): +# result[key] = value.decode('utf-8', errors='ignore') +# json_results = json.dumps(result, sort_keys=True) +# +# #print("json is: ", json_results) +# +# return json_results def row2dict(row): |