From 2b1a81ebf792d2c7d9c0bd572ebcf5bc881dfcab Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 11 Apr 2013 20:08:51 +0000 Subject: Fixed a bug with quick_search_table.py Edited generate_probesetfreeze_file.py to try running it on the dataset with incorrect regression lines --- .../maintenance/generate_probesetfreeze_file.py | 24 ++++---- wqflask/maintenance/quick_search_table.py | 64 ++++++++++++---------- wqflask/wqflask/templates/show_trait.html | 2 +- 3 files changed, 49 insertions(+), 41 deletions(-) (limited to 'wqflask') diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index 95515cea..91a2b8a1 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -36,12 +36,12 @@ def get_strains(cursor): StrainXRef.InbredSetId = InbredSet.Id and InbredSet.Name=%s; """, "BXD") - + strains = [strain[0] for strain in cursor.fetchall()] print("strains:", pf(strains)) for strain in strains: print(" -", strain) - + return strains def get_probeset_vals(cursor, dataset_name): @@ -53,13 +53,13 @@ def get_probeset_vals(cursor, dataset_name): ProbeSetFreeze.Name = %s and ProbeSetXRef.ProbeSetId = ProbeSet.Id; """, dataset_name) - + probesets = cursor.fetchall() - + print("Fetched probesets") - + probeset_vals = collections.OrderedDict() - + for counter, probeset in enumerate(probesets): cursor.execute(""" select Strain.Name, ProbeSetData.value from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain @@ -73,19 +73,19 @@ def get_probeset_vals(cursor, dataset_name): vals = cursor.fetchall() for val in vals: val_dic[val[0]] = val[1] - + probeset_vals[probeset[1]] = val_dic show_progress("Querying DB", counter) - + return probeset_vals def trim_strains(strains, probeset_vals): trimmed_strains = [] #print("probeset_vals is:", pf(probeset_vals)) first_probeset = list(probeset_vals.itervalues())[0] + print("\n**** first_probeset is:", pf(first_probeset)) for strain in strains: print("\n**** strain is:", pf(strain)) - print("\n**** first_probeset is:", pf(first_probeset)) if strain in first_probeset: trimmed_strains.append(strain) print("trimmed_strains:", pf(trimmed_strains)) @@ -107,9 +107,9 @@ def write_data_matrix_file(strains, probeset_vals, filename): def main(): filename = os.path.expanduser("~/gene/wqflask/maintenance/" + - "ProbeSetFreezeId_379_FullName_EPFL_LISP_BXD_CD_Muscle_Affy_Mouse_Gene_1.0_ST_" + - "(Dec11)_RMA_**.txt") - dataset_name = "EPFLMouseMuscleCDRMA1211" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + + "(Oct08)_RankInv_Beta.txt") + dataset_name = "Eye_AXBXA_1008_RankInv" cursor = get_cursor() strains = get_strains(cursor) diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 75bc7d00..d175e600 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -320,6 +320,8 @@ class ProbeSetXRef(Base): print("terms is:", values['terms']) #values['species'] = get_species("ProbeSet", ps.Id) values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId) + if values['result_fields'] == None: + continue ins = QuickSearch.insert().values(**values) conn.execute(ins) counter += 1 @@ -339,36 +341,39 @@ class ProbeSetXRef(Base): "ProbeSet.alias as alias " "FROM ProbeSet " "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all() - + unique = set() - for item in results[0]: - #print("locals:", locals()) - if not item: - continue - for token in item.split(): - if token.startswith(('(','[')): - token = token[1:] - if token.endswith((')', ']')): - token = token[:-1] - if token.endswith(';'): - token = token[:-1] - if len(token) > 2: - try: - # This hopefully ensures that the token is utf-8 - token = token.encode('utf-8') - print(" ->", token) - except UnicodeDecodeError: - print("\n-- UDE \n") - # Can't get it into utf-8, we won't use it - continue - - unique.add(token) - print("\nUnique terms are: {}\n".format(unique)) - return " ".join(unique) + if len(results): + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) + print("\nUnique terms are: {}\n".format(unique)) + return " ".join(unique) @staticmethod def get_result_fields(probeset_id, dataset_id): + print("probeset_id: ", probeset_id) + print("dataset_id: ", dataset_id) results = Session.query( "name", "species", @@ -416,8 +421,11 @@ class ProbeSetXRef(Base): "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id, dataset_id=dataset_id).all() for result in results: - print(result) - assert len(set(result for result in results)) == 1, "Different results" + print("-", result) + + if len(set(result for result in results)) != 1: + return None + #assert len(set(result for result in results)) == 1, "Different results" print("results are:", results) result = results[0] @@ -468,8 +476,8 @@ def page_query(q): def main(): - GenoXRef.run() ProbeSetXRef.run() + GenoXRef.run() PublishXRef.run() if __name__ == "__main__": diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index d8f44d53..56887d5c 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -52,7 +52,7 @@ - --> + -- cgit v1.2.3