diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/generate_probesetfreeze_file.py | 24 | ||||
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 64 |
2 files changed, 48 insertions, 40 deletions
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index 95515cea..91a2b8a1 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -36,12 +36,12 @@ def get_strains(cursor): StrainXRef.InbredSetId = InbredSet.Id and InbredSet.Name=%s; """, "BXD") - + strains = [strain[0] for strain in cursor.fetchall()] print("strains:", pf(strains)) for strain in strains: print(" -", strain) - + return strains def get_probeset_vals(cursor, dataset_name): @@ -53,13 +53,13 @@ def get_probeset_vals(cursor, dataset_name): ProbeSetFreeze.Name = %s and ProbeSetXRef.ProbeSetId = ProbeSet.Id; """, dataset_name) - + probesets = cursor.fetchall() - + print("Fetched probesets") - + probeset_vals = collections.OrderedDict() - + for counter, probeset in enumerate(probesets): cursor.execute(""" select Strain.Name, ProbeSetData.value from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain @@ -73,19 +73,19 @@ def get_probeset_vals(cursor, dataset_name): vals = cursor.fetchall() for val in vals: val_dic[val[0]] = val[1] - + probeset_vals[probeset[1]] = val_dic show_progress("Querying DB", counter) - + return probeset_vals def trim_strains(strains, probeset_vals): trimmed_strains = [] #print("probeset_vals is:", pf(probeset_vals)) first_probeset = list(probeset_vals.itervalues())[0] + print("\n**** first_probeset is:", pf(first_probeset)) for strain in strains: print("\n**** strain is:", pf(strain)) - print("\n**** first_probeset is:", pf(first_probeset)) if strain in first_probeset: trimmed_strains.append(strain) print("trimmed_strains:", pf(trimmed_strains)) @@ -107,9 +107,9 @@ def write_data_matrix_file(strains, probeset_vals, filename): def main(): filename = os.path.expanduser("~/gene/wqflask/maintenance/" + - "ProbeSetFreezeId_379_FullName_EPFL_LISP_BXD_CD_Muscle_Affy_Mouse_Gene_1.0_ST_" + - "(Dec11)_RMA_**.txt") - dataset_name = "EPFLMouseMuscleCDRMA1211" + "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + + "(Oct08)_RankInv_Beta.txt") + dataset_name = "Eye_AXBXA_1008_RankInv" cursor = get_cursor() strains = get_strains(cursor) diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index 75bc7d00..d175e600 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -320,6 +320,8 @@ class ProbeSetXRef(Base): print("terms is:", values['terms']) #values['species'] = get_species("ProbeSet", ps.Id) values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId) + if values['result_fields'] == None: + continue ins = QuickSearch.insert().values(**values) conn.execute(ins) counter += 1 @@ -339,36 +341,39 @@ class ProbeSetXRef(Base): "ProbeSet.alias as alias " "FROM ProbeSet " "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all() - + unique = set() - for item in results[0]: - #print("locals:", locals()) - if not item: - continue - for token in item.split(): - if token.startswith(('(','[')): - token = token[1:] - if token.endswith((')', ']')): - token = token[:-1] - if token.endswith(';'): - token = token[:-1] - if len(token) > 2: - try: - # This hopefully ensures that the token is utf-8 - token = token.encode('utf-8') - print(" ->", token) - except UnicodeDecodeError: - print("\n-- UDE \n") - # Can't get it into utf-8, we won't use it - continue - - unique.add(token) - print("\nUnique terms are: {}\n".format(unique)) - return " ".join(unique) + if len(results): + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) + print("\nUnique terms are: {}\n".format(unique)) + return " ".join(unique) @staticmethod def get_result_fields(probeset_id, dataset_id): + print("probeset_id: ", probeset_id) + print("dataset_id: ", dataset_id) results = Session.query( "name", "species", @@ -416,8 +421,11 @@ class ProbeSetXRef(Base): "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id, dataset_id=dataset_id).all() for result in results: - print(result) - assert len(set(result for result in results)) == 1, "Different results" + print("-", result) + + if len(set(result for result in results)) != 1: + return None + #assert len(set(result for result in results)) == 1, "Different results" print("results are:", results) result = results[0] @@ -468,8 +476,8 @@ def page_query(q): def main(): - GenoXRef.run() ProbeSetXRef.run() + GenoXRef.run() PublishXRef.run() if __name__ == "__main__": |