2 files changed, 48 insertions, 40 deletions
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py
index 95515cea..91a2b8a1 100644
--- a/wqflask/maintenance/generate_probesetfreeze_file.py
+++ b/wqflask/maintenance/generate_probesetfreeze_file.py
@@ -36,12 +36,12 @@ def get_strains(cursor):
                             StrainXRef.InbredSetId = InbredSet.Id
                             and InbredSet.Name=%s;
                 """, "BXD")
-    
+
     strains = [strain[0] for strain in cursor.fetchall()]
     print("strains:", pf(strains))
     for strain in strains:
         print(" -", strain)
-    
+
     return strains
 
 def get_probeset_vals(cursor, dataset_name):
@@ -53,13 +53,13 @@ def get_probeset_vals(cursor, dataset_name):
                       ProbeSetFreeze.Name = %s and
                       ProbeSetXRef.ProbeSetId = ProbeSet.Id;
             """, dataset_name)
-    
+
     probesets = cursor.fetchall()
-    
+
     print("Fetched probesets")
-    
+
     probeset_vals = collections.OrderedDict()
-    
+
     for counter, probeset in enumerate(probesets):
         cursor.execute(""" select Strain.Name, ProbeSetData.value
                        from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain
@@ -73,19 +73,19 @@ def get_probeset_vals(cursor, dataset_name):
         vals = cursor.fetchall()
         for val in vals:
             val_dic[val[0]] = val[1]
-        
+
         probeset_vals[probeset[1]] = val_dic
         show_progress("Querying DB", counter)
-        
+
     return probeset_vals
 
 def trim_strains(strains, probeset_vals):
     trimmed_strains = []
     #print("probeset_vals is:", pf(probeset_vals))
     first_probeset = list(probeset_vals.itervalues())[0]
+    print("\n**** first_probeset is:", pf(first_probeset))
     for strain in strains:
         print("\n**** strain is:", pf(strain))
-        print("\n**** first_probeset is:", pf(first_probeset))
         if strain in first_probeset:
             trimmed_strains.append(strain)
     print("trimmed_strains:", pf(trimmed_strains))
@@ -107,9 +107,9 @@ def write_data_matrix_file(strains, probeset_vals, filename):
 
 def main():
     filename = os.path.expanduser("~/gene/wqflask/maintenance/" +
-                "ProbeSetFreezeId_379_FullName_EPFL_LISP_BXD_CD_Muscle_Affy_Mouse_Gene_1.0_ST_" + 
-                "(Dec11)_RMA_**.txt")
-    dataset_name = "EPFLMouseMuscleCDRMA1211"
+                "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" + 
+                "(Oct08)_RankInv_Beta.txt")
+    dataset_name = "Eye_AXBXA_1008_RankInv"
 
     cursor = get_cursor()
     strains = get_strains(cursor)
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index 75bc7d00..d175e600 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -320,6 +320,8 @@ class ProbeSetXRef(Base):
             print("terms is:", values['terms'])
             #values['species'] = get_species("ProbeSet", ps.Id)
             values['result_fields'] = cls.get_result_fields(ps.ProbeSetId, ps.ProbeSetFreezeId)
+            if values['result_fields'] == None:
+                continue
             ins = QuickSearch.insert().values(**values)
             conn.execute(ins)
             counter += 1
@@ -339,36 +341,39 @@ class ProbeSetXRef(Base):
                 "ProbeSet.alias as alias "
                 "FROM ProbeSet "
                 "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=probeset_id).all()
-        
+
         unique = set()
-        for item in results[0]:
-            #print("locals:", locals())
-            if not item:
-                continue
-            for token in item.split():
-                if token.startswith(('(','[')):
-                    token = token[1:]
-                if token.endswith((')', ']')):
-                    token = token[:-1]
-                if token.endswith(';'):
-                    token = token[:-1]
-                if len(token) > 2:
-                    try:
-                        # This hopefully ensures that the token is utf-8
-                        token = token.encode('utf-8')
-                        print(" ->", token)
-                    except UnicodeDecodeError:
-                        print("\n-- UDE \n")
-                        # Can't get it into utf-8, we won't use it
-                        continue 
-                    
-                    unique.add(token)
-        print("\nUnique terms are: {}\n".format(unique))
-        return " ".join(unique)
+        if len(results):
+            for item in results[0]:
+                #print("locals:", locals())
+                if not item:
+                    continue
+                for token in item.split():
+                    if token.startswith(('(','[')):
+                        token = token[1:]
+                    if token.endswith((')', ']')):
+                        token = token[:-1]
+                    if token.endswith(';'):
+                        token = token[:-1]
+                    if len(token) > 2:
+                        try:
+                            # This hopefully ensures that the token is utf-8
+                            token = token.encode('utf-8')
+                            print(" ->", token)
+                        except UnicodeDecodeError:
+                            print("\n-- UDE \n")
+                            # Can't get it into utf-8, we won't use it
+                            continue 
+                        
+                        unique.add(token)
+            print("\nUnique terms are: {}\n".format(unique))
+            return " ".join(unique)
 
 
     @staticmethod
     def get_result_fields(probeset_id, dataset_id):
+        print("probeset_id: ", probeset_id)
+        print("dataset_id: ", dataset_id)
         results = Session.query(
                 "name",
                 "species",
@@ -416,8 +421,11 @@ class ProbeSetXRef(Base):
                 "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id,
                                                                     dataset_id=dataset_id).all()
         for result in results:
-            print(result)
-        assert len(set(result for result in results)) == 1, "Different results"
+            print("-", result)
+            
+        if len(set(result for result in results)) != 1:
+            return None
+        #assert len(set(result for result in results)) == 1, "Different results"
         
         print("results are:", results)
         result = results[0]
@@ -468,8 +476,8 @@ def page_query(q):
 
 
 def main():
-    GenoXRef.run()
     ProbeSetXRef.run()
+    GenoXRef.run()
     PublishXRef.run()
 
 if __name__ == "__main__":