about summary refs log tree commit diff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorLei Yan2013-09-13 14:07:27 -0500
committerLei Yan2013-09-13 14:07:27 -0500
commitaf24c0d610d9a2189f86677e4f23deb372ee2bf7 (patch)
tree53480351b97727670637a37dbd4c78e52446ae88 /wqflask/maintenance
parent155e2997613c0750de30b734686f8977524956f9 (diff)
parentc5fc931621707865357ace4b637db7481e0be552 (diff)
downloadgenenetwork2-af24c0d610d9a2189f86677e4f23deb372ee2bf7.tar.gz
Merge https://github.com/zsloan/genenetwork
Resolved conflicts:
	wqflask/base/trait.py
	wqflask/wqflask/correlation/correlationFunction.py
	wqflask/wqflask/correlation/correlation_function.py
	wqflask/wqflask/correlation/correlation_functions.py
	wqflask/wqflask/correlation/show_corr_results.py
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/gen_select_dataset.py94
-rw-r--r--wqflask/maintenance/get_group_samplelists.py43
l---------wqflask/maintenance/our_settings.py1
-rw-r--r--wqflask/maintenance/quick_search_table.py62
5 files changed, 151 insertions, 49 deletions
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/maintenance/__init__.py
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 4c544192..ad560659 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -29,25 +29,47 @@ It needs to be run manually when database has been changed.
 
 from __future__ import print_function, division
 
-import sys
+#from flask import config
+#
+#cdict = {}
+#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS')
+#print("cdict is:", cdict)
 
-sys.path.insert(0, "..")
+import our_settings
 
 import MySQLdb
 
 import simplejson as json
+import urlparse
 
-from pprint import pformat as pf
 
-from base import webqtlConfig
+#import sqlalchemy as sa
+
+from pprint import pformat as pf
 
+#Engine = sa.create_engine(our_settings.SQLALCHEMY_DATABASE_URI)
 
 # build MySql database connection
-Con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
-                      host=webqtlConfig.MYSQL_SERVER,
-                      user=webqtlConfig.DB_USER,
-                      passwd=webqtlConfig.DB_PASSWD)
-Cursor = Con.cursor()
+
+#conn = Engine.connect()
+
+
+
+
+
+def parse_db_uri(db_uri):
+    """Converts a database URI to the db name, host name, user name, and password"""
+    
+    parsed_uri = urlparse.urlparse(our_settings.DB_URI)
+    
+    db_conn_info = dict(
+                        db = parsed_uri.path[1:],
+                        host = parsed_uri.hostname,
+                        user = parsed_uri.username,
+                        passwd = parsed_uri.password)
+    
+    return db_conn_info
+
 
 
 def get_species():
@@ -77,12 +99,14 @@ def get_groups(species):
 def get_types(groups):
     """Build types list"""
     types = {}
+    print("Groups: ", pf(groups))
     for species, group_dict in groups.iteritems():
         types[species] = {}
         for group_name, _group_full_name in group_dict:
             # make group an alias to shorten the code
             types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
             types[species][group_name] += build_types(species, group_name)
+
     return types
 
 
@@ -93,17 +117,32 @@ def build_types(species, group):
     (all types except phenotype/genotype are tissues)
      
     """
-    Cursor.execute("""select distinct Tissue.Name, concat(Tissue.Name, ' mRNA')
+    
+    print("""select distinct Tissue.Name
+                       from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
+                       where Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and
+                       InbredSet.Name = '{}' and
+                       ProbeFreeze.TissueId = Tissue.Id and
+                       ProbeFreeze.InbredSetId = InbredSet.Id and
+                       ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
+                       ProbeSetFreeze.public > 0
+                       order by Tissue.Name""".format(species, group))
+    Cursor.execute("""select distinct Tissue.Name
                        from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
                        where Species.Name = %s and Species.Id = InbredSet.SpeciesId and
                        InbredSet.Name = %s and
                        ProbeFreeze.TissueId = Tissue.Id and
                        ProbeFreeze.InbredSetId = InbredSet.Id and
                        ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
-                       ProbeSetFreeze.public > %s
-                       order by Tissue.Name""", (species, group, webqtlConfig.PUBLICTHRESH))
-    return Cursor.fetchall()
-
+                       ProbeSetFreeze.public > 0
+                       order by Tissue.Name""", (species, group))
+    
+    results = []
+    for result in Cursor.fetchall():
+        if len(result):
+            results.append((result[0], result[0]))
+    
+    return results
 
 def get_datasets(types):
     """Build datasets list"""
@@ -111,9 +150,10 @@ def get_datasets(types):
     for species, group_dict in types.iteritems():
         datasets[species] = {}
         for group, type_list in group_dict.iteritems():
+            print("type_list: ", type_list)
             datasets[species][group] = {}
-            for type_name, _type_full_name in type_list:
-                datasets[species][group][type_name] = build_datasets(species, group, type_name)
+            for type_name in type_list:
+                datasets[species][group][type_name[0]] = build_datasets(species, group, type_name[0])
     return datasets
 
 
@@ -134,20 +174,30 @@ def build_datasets(species, group, type_name):
     if dataset_value:
         return [(dataset_value, dataset_text)]
     else:
+        print("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
+                    ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
+                    Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and
+                    InbredSet.Name = '{}' and
+                    ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{}'
+                    and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId =
+                    InbredSet.Id and ProbeSetFreeze.public > 0 order by
+                    ProbeSetFreeze.CreateTime desc""".format(species, group, type_name))
         Cursor.execute("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
                     ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
                     Species.Name = %s and Species.Id = InbredSet.SpeciesId and
                     InbredSet.Name = %s and
                     ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = %s
                     and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId =
-                    InbredSet.Id and ProbeSetFreeze.public > %s order by
-                    ProbeSetFreeze.CreateTime desc""", (
-                        species, group, type_name, webqtlConfig.PUBLICTHRESH))
+                    InbredSet.Id and ProbeSetFreeze.public > 0 order by
+                    ProbeSetFreeze.CreateTime desc""", (species, group, type_name))
         return Cursor.fetchall()
 
 
 def main():
     """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
+
+    parse_db_uri(our_settings.DB_URI)
+
     species = get_species()
     groups = get_groups(species)
     types = get_types(groups)
@@ -167,6 +217,8 @@ def main():
                 datasets=datasets,
                 )
 
+    print("data:", data)
+
     output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json"""
 
     with open(output_file, 'w') as fh:
@@ -182,5 +234,7 @@ def _test_it():
     datasets = build_datasets("Mouse", "BXD", "Hippocampus")
     print("build_datasets:", pf(datasets))
 
-if __name__ == '__main__':
+if __name__ == '__main__':   
+    Conn = MySQLdb.Connect(**parse_db_uri(our_settings.DB_URI))
+    Cursor = Conn.cursor()
     main()
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
new file mode 100644
index 00000000..c9ec3872
--- /dev/null
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import, print_function, division
+
+import os
+import glob
+import gzip
+
+from base import webqtlConfig
+
+
+def process_genofiles(geno_dir=webqtlConfig.GENODIR):
+    print("Yabba")
+    #sys.exit("Dabba")
+    os.chdir(geno_dir)
+    for geno_file in glob.glob("*"):
+        if geno_file.lower().endswith(('.geno', '.geno.gz')):
+            #group_name = genofilename.split('.')[0]
+            sample_list = get_samplelist(geno_file)
+
+
+def get_samplelist(geno_file):
+    genofilename = os.path.join(webqtlConfig.GENODIR, geno_file)
+    if os.path.isfile(genofilename + '.gz'):
+        genofilename += '.gz'
+        genofile = gzip.open(genofilename)
+    else:
+        genofile = open(genofilename)
+        
+    for line in genofile:
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith(("#", "@")):
+            continue
+        break
+    
+    headers = line.split()
+    
+    if headers[3] == "Mb":
+        samplelist = headers[4:]
+    else:
+        samplelist = headers[3:]
+    return samplelist
+
diff --git a/wqflask/maintenance/our_settings.py b/wqflask/maintenance/our_settings.py
new file mode 120000
index 00000000..14efe407
--- /dev/null
+++ b/wqflask/maintenance/our_settings.py
@@ -0,0 +1 @@
+../../../zach_settings.py
\ No newline at end of file
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index b07e7656..9cd792ef 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -71,9 +71,10 @@ class PublishXRef(Base):
             values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId])
             values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId)
             print("terms is:", values['terms'])
-            values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId)
-            ins = QuickSearch.insert().values(**values)
-            conn.execute(ins)
+            if values['terms']:
+                values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId)
+                ins = QuickSearch.insert().values(**values)
+                conn.execute(ins)
             counter += 1
             print("Done:", counter)
 
@@ -100,28 +101,30 @@ class PublishXRef(Base):
                                                             inbredset_id=inbredset_id).all()
 
         unique = set()
-        for item in results[0]:
-            #print("locals:", locals())
-            if not item:
-                continue
-            for token in item.split():
-                if token.startswith(('(','[')):
-                    token = token[1:]
-                if token.endswith((')', ']')):
-                    token = token[:-1]
-                if token.endswith(';'):
-                    token = token[:-1]
-                if len(token) > 2:
-                    try:
-                        # This hopefully ensures that the token is utf-8
-                        token = token.encode('utf-8')
-                        print(" ->", token)
-                    except UnicodeDecodeError:
-                        print("\n-- UDE \n")
-                        # Can't get it into utf-8, we won't use it
-                        continue 
-
-                    unique.add(token)
+        print("results: ", results)
+        if len(results):
+            for item in results[0]:
+                #print("locals:", locals())
+                if not item:
+                    continue
+                for token in item.split():
+                    if token.startswith(('(','[')):
+                        token = token[1:]
+                    if token.endswith((')', ']')):
+                        token = token[:-1]
+                    if token.endswith(';'):
+                        token = token[:-1]
+                    if len(token) > 2:
+                        try:
+                            # This hopefully ensures that the token is utf-8
+                            token = token.encode('utf-8')
+                            print(" ->", token)
+                        except UnicodeDecodeError:
+                            print("\n-- UDE \n")
+                            # Can't get it into utf-8, we won't use it
+                            continue 
+    
+                        unique.add(token)
         #print("\nUnique terms are: {}\n".format(unique))
         return " ".join(unique)            
 
@@ -467,8 +470,8 @@ QuickSearch = sa.Table("QuickSearch", Metadata,
         mysql_engine = 'MyISAM',
                     )
 
-#QuickSearch.drop(Engine, checkfirst=True)
-#Metadata.create_all(Engine)
+QuickSearch.drop(Engine, checkfirst=True)
+Metadata.create_all(Engine)
 
 
 def row2dict(row):
@@ -495,9 +498,10 @@ def main():
     Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
     
     """
+
+    GenoXRef.run()
+    PublishXRef.run()
     ProbeSetXRef.run()
-    #GenoXRef.run()
-    #PublishXRef.run()
 
 if __name__ == "__main__":
     main()
\ No newline at end of file