From 2b1a81ebf792d2c7d9c0bd572ebcf5bc881dfcab Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 11 Apr 2013 20:08:51 +0000
Subject: Fixed a bug with quick_search_table.py
Edited generate_probesetfreeze_file.py to try running it on the dataset
with incorrect regression lines
---
.../maintenance/generate_probesetfreeze_file.py | 24 ++++----
wqflask/maintenance/quick_search_table.py | 64 ++++++++++++----------
wqflask/wqflask/templates/show_trait.html | 2 +-
3 files changed, 49 insertions(+), 41 deletions(-)
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py
index 95515cea..91a2b8a1 100644
--- a/wqflask/maintenance/generate_probesetfreeze_file.py
+++ b/wqflask/maintenance/generate_probesetfreeze_file.py
@@ -36,12 +36,12 @@ def get_strains(cursor):
StrainXRef.InbredSetId = InbredSet.Id
and InbredSet.Name=%s;
""", "BXD")
-
+
strains = [strain[0] for strain in cursor.fetchall()]
print("strains:", pf(strains))
for strain in strains:
print(" -", strain)
-
+
return strains
def get_probeset_vals(cursor, dataset_name):
@@ -53,13 +53,13 @@ def get_probeset_vals(cursor, dataset_name):
ProbeSetFreeze.Name = %s and
ProbeSetXRef.ProbeSetId = ProbeSet.Id;
""", dataset_name)
-
+
probesets = cursor.fetchall()
-
+
print("Fetched probesets")
-
+
probeset_vals = collections.OrderedDict()
-
+
for counter, probeset in enumerate(probesets):
cursor.execute(""" select Strain.Name, ProbeSetData.value
from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain
@@ -73,19 +73,19 @@ def get_probeset_vals(cursor, dataset_name):
vals = cursor.fetchall()
for val in vals:
val_dic[val[0]] = val[1]
-
+
probeset_vals[probeset[1]] = val_dic
show_progress("Querying DB", counter)
-
+
return probeset_vals
def trim_strains(strains, probeset_vals):
trimmed_strains = []
#print("probeset_vals is:", pf(probeset_vals))
first_probeset = list(probeset_vals.itervalues())[0]
+ print("\n**** first_probeset is:", pf(first_probeset))
for strain in strains:
print("\n**** strain is:", pf(strain))
- print("\n**** first_probeset is:", pf(first_probeset))
if strain in first_probeset:
trimmed_strains.append(strain)
print("trimmed_strains:", pf(trimmed_strains))
@@ -107,9 +107,9 @@ def write_data_matrix_file(strains, probeset_vals, filename):
def main():
filename = os.path.expanduser("~/gene/wqflask/maintenance/" +
- "ProbeSetFreezeId_379_FullName_EPFL_LISP_BXD_CD_Muscle_Affy_Mouse_Gene_1.0_ST_" +
- "(Dec11)_RMA_**.txt")
- dataset_name = "EPFLMouseMuscleCDRMA1211"
+ "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" +
+ "(Oct08)_RankInv_Beta.txt")
+ dataset_name = "Eye_AXBXA_1008_RankInv"
cursor = get_cursor()
strains = get_strains(cursor)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 75bc7d00..d175e600 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -320,6 +320,8 @@ class ProbeSetXRef(Base):
print("terms is:", values['terms'])
#values['species'] = get_species("ProbeSet", ps.Id)
values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId)
+ if values['result_fields'] == None:
+ continue
ins = QuickSearch.insert().values(**values)
conn.execute(ins)
counter += 1
@@ -339,36 +341,39 @@ class ProbeSetXRef(Base):
"ProbeSet.alias as alias "
"FROM ProbeSet "
"WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all()
-
+
unique = set()
- for item in results[0]:
- #print("locals:", locals())
- if not item:
- continue
- for token in item.split():
- if token.startswith(('(','[')):
- token = token[1:]
- if token.endswith((')', ']')):
- token = token[:-1]
- if token.endswith(';'):
- token = token[:-1]
- if len(token) > 2:
- try:
- # This hopefully ensures that the token is utf-8
- token = token.encode('utf-8')
- print(" ->", token)
- except UnicodeDecodeError:
- print("\n-- UDE \n")
- # Can't get it into utf-8, we won't use it
- continue
-
- unique.add(token)
- print("\nUnique terms are: {}\n".format(unique))
- return " ".join(unique)
+ if len(results):
+ for item in results[0]:
+ #print("locals:", locals())
+ if not item:
+ continue
+ for token in item.split():
+ if token.startswith(('(','[')):
+ token = token[1:]
+ if token.endswith((')', ']')):
+ token = token[:-1]
+ if token.endswith(';'):
+ token = token[:-1]
+ if len(token) > 2:
+ try:
+ # This hopefully ensures that the token is utf-8
+ token = token.encode('utf-8')
+ print(" ->", token)
+ except UnicodeDecodeError:
+ print("\n-- UDE \n")
+ # Can't get it into utf-8, we won't use it
+ continue
+
+ unique.add(token)
+ print("\nUnique terms are: {}\n".format(unique))
+ return " ".join(unique)
@staticmethod
def get_result_fields(probeset_id, dataset_id):
+ print("probeset_id: ", probeset_id)
+ print("dataset_id: ", dataset_id)
results = Session.query(
"name",
"species",
@@ -416,8 +421,11 @@ class ProbeSetXRef(Base):
"InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id,
dataset_id=dataset_id).all()
for result in results:
- print(result)
- assert len(set(result for result in results)) == 1, "Different results"
+ print("-", result)
+
+ if len(set(result for result in results)) != 1:
+ return None
+ #assert len(set(result for result in results)) == 1, "Different results"
print("results are:", results)
result = results[0]
@@ -468,8 +476,8 @@ def page_query(q):
def main():
- GenoXRef.run()
ProbeSetXRef.run()
+ GenoXRef.run()
PublishXRef.run()
if __name__ == "__main__":
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index d8f44d53..56887d5c 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -52,7 +52,7 @@
- -->
+
--
cgit v1.2.3