aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorLei Yan2013-09-13 14:07:27 -0500
committerLei Yan2013-09-13 14:07:27 -0500
commitaf24c0d610d9a2189f86677e4f23deb372ee2bf7 (patch)
tree53480351b97727670637a37dbd4c78e52446ae88 /wqflask/maintenance
parent155e2997613c0750de30b734686f8977524956f9 (diff)
parentc5fc931621707865357ace4b637db7481e0be552 (diff)
downloadgenenetwork2-af24c0d610d9a2189f86677e4f23deb372ee2bf7.tar.gz
Merge https://github.com/zsloan/genenetwork
Resolved conflicts: wqflask/base/trait.py wqflask/wqflask/correlation/correlationFunction.py wqflask/wqflask/correlation/correlation_function.py wqflask/wqflask/correlation/correlation_functions.py wqflask/wqflask/correlation/show_corr_results.py
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/__init__.py0
-rw-r--r--wqflask/maintenance/gen_select_dataset.py94
-rw-r--r--wqflask/maintenance/get_group_samplelists.py43
l---------wqflask/maintenance/our_settings.py1
-rw-r--r--wqflask/maintenance/quick_search_table.py62
5 files changed, 151 insertions, 49 deletions
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/maintenance/__init__.py
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 4c544192..ad560659 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -29,25 +29,47 @@ It needs to be run manually when database has been changed.
from __future__ import print_function, division
-import sys
+#from flask import config
+#
+#cdict = {}
+#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS')
+#print("cdict is:", cdict)
-sys.path.insert(0, "..")
+import our_settings
import MySQLdb
import simplejson as json
+import urlparse
-from pprint import pformat as pf
-from base import webqtlConfig
+#import sqlalchemy as sa
+
+from pprint import pformat as pf
+#Engine = sa.create_engine(our_settings.SQLALCHEMY_DATABASE_URI)
# build MySql database connection
-Con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
- host=webqtlConfig.MYSQL_SERVER,
- user=webqtlConfig.DB_USER,
- passwd=webqtlConfig.DB_PASSWD)
-Cursor = Con.cursor()
+
+#conn = Engine.connect()
+
+
+
+
+
+def parse_db_uri(db_uri):
+ """Converts a database URI to the db name, host name, user name, and password"""
+
+ parsed_uri = urlparse.urlparse(our_settings.DB_URI)
+
+ db_conn_info = dict(
+ db = parsed_uri.path[1:],
+ host = parsed_uri.hostname,
+ user = parsed_uri.username,
+ passwd = parsed_uri.password)
+
+ return db_conn_info
+
def get_species():
@@ -77,12 +99,14 @@ def get_groups(species):
def get_types(groups):
"""Build types list"""
types = {}
+ print("Groups: ", pf(groups))
for species, group_dict in groups.iteritems():
types[species] = {}
for group_name, _group_full_name in group_dict:
# make group an alias to shorten the code
types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")]
types[species][group_name] += build_types(species, group_name)
+
return types
@@ -93,17 +117,32 @@ def build_types(species, group):
(all types except phenotype/genotype are tissues)
"""
- Cursor.execute("""select distinct Tissue.Name, concat(Tissue.Name, ' mRNA')
+
+ print("""select distinct Tissue.Name
+ from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
+ where Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and
+ InbredSet.Name = '{}' and
+ ProbeFreeze.TissueId = Tissue.Id and
+ ProbeFreeze.InbredSetId = InbredSet.Id and
+ ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
+ ProbeSetFreeze.public > 0
+ order by Tissue.Name""".format(species, group))
+ Cursor.execute("""select distinct Tissue.Name
from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species
where Species.Name = %s and Species.Id = InbredSet.SpeciesId and
InbredSet.Name = %s and
ProbeFreeze.TissueId = Tissue.Id and
ProbeFreeze.InbredSetId = InbredSet.Id and
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and
- ProbeSetFreeze.public > %s
- order by Tissue.Name""", (species, group, webqtlConfig.PUBLICTHRESH))
- return Cursor.fetchall()
-
+ ProbeSetFreeze.public > 0
+ order by Tissue.Name""", (species, group))
+
+ results = []
+ for result in Cursor.fetchall():
+ if len(result):
+ results.append((result[0], result[0]))
+
+ return results
def get_datasets(types):
"""Build datasets list"""
@@ -111,9 +150,10 @@ def get_datasets(types):
for species, group_dict in types.iteritems():
datasets[species] = {}
for group, type_list in group_dict.iteritems():
+ print("type_list: ", type_list)
datasets[species][group] = {}
- for type_name, _type_full_name in type_list:
- datasets[species][group][type_name] = build_datasets(species, group, type_name)
+ for type_name in type_list:
+ datasets[species][group][type_name[0]] = build_datasets(species, group, type_name[0])
return datasets
@@ -134,20 +174,30 @@ def build_datasets(species, group, type_name):
if dataset_value:
return [(dataset_value, dataset_text)]
else:
+ print("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
+ ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
+ Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and
+ InbredSet.Name = '{}' and
+ ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{}'
+ and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId =
+ InbredSet.Id and ProbeSetFreeze.public > 0 order by
+ ProbeSetFreeze.CreateTime desc""".format(species, group, type_name))
Cursor.execute("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
Species.Name = %s and Species.Id = InbredSet.SpeciesId and
InbredSet.Name = %s and
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = %s
and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId =
- InbredSet.Id and ProbeSetFreeze.public > %s order by
- ProbeSetFreeze.CreateTime desc""", (
- species, group, type_name, webqtlConfig.PUBLICTHRESH))
+ InbredSet.Id and ProbeSetFreeze.public > 0 order by
+ ProbeSetFreeze.CreateTime desc""", (species, group, type_name))
return Cursor.fetchall()
def main():
"""Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
+
+ parse_db_uri(our_settings.DB_URI)
+
species = get_species()
groups = get_groups(species)
types = get_types(groups)
@@ -167,6 +217,8 @@ def main():
datasets=datasets,
)
+ print("data:", data)
+
output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json"""
with open(output_file, 'w') as fh:
@@ -182,5 +234,7 @@ def _test_it():
datasets = build_datasets("Mouse", "BXD", "Hippocampus")
print("build_datasets:", pf(datasets))
-if __name__ == '__main__':
+if __name__ == '__main__':
+ Conn = MySQLdb.Connect(**parse_db_uri(our_settings.DB_URI))
+ Cursor = Conn.cursor()
main()
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
new file mode 100644
index 00000000..c9ec3872
--- /dev/null
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import, print_function, division
+
+import os
+import glob
+import gzip
+
+from base import webqtlConfig
+
+
+def process_genofiles(geno_dir=webqtlConfig.GENODIR):
+ print("Yabba")
+ #sys.exit("Dabba")
+ os.chdir(geno_dir)
+ for geno_file in glob.glob("*"):
+ if geno_file.lower().endswith(('.geno', '.geno.gz')):
+ #group_name = genofilename.split('.')[0]
+ sample_list = get_samplelist(geno_file)
+
+
+def get_samplelist(geno_file):
+ genofilename = os.path.join(webqtlConfig.GENODIR, geno_file)
+ if os.path.isfile(genofilename + '.gz'):
+ genofilename += '.gz'
+ genofile = gzip.open(genofilename)
+ else:
+ genofile = open(genofilename)
+
+ for line in genofile:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith(("#", "@")):
+ continue
+ break
+
+ headers = line.split()
+
+ if headers[3] == "Mb":
+ samplelist = headers[4:]
+ else:
+ samplelist = headers[3:]
+ return samplelist
+
diff --git a/wqflask/maintenance/our_settings.py b/wqflask/maintenance/our_settings.py
new file mode 120000
index 00000000..14efe407
--- /dev/null
+++ b/wqflask/maintenance/our_settings.py
@@ -0,0 +1 @@
+../../../zach_settings.py \ No newline at end of file
diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py
index b07e7656..9cd792ef 100644
--- a/wqflask/maintenance/quick_search_table.py
+++ b/wqflask/maintenance/quick_search_table.py
@@ -71,9 +71,10 @@ class PublishXRef(Base):
values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId])
values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId)
print("terms is:", values['terms'])
- values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId)
- ins = QuickSearch.insert().values(**values)
- conn.execute(ins)
+ if values['terms']:
+ values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId)
+ ins = QuickSearch.insert().values(**values)
+ conn.execute(ins)
counter += 1
print("Done:", counter)
@@ -100,28 +101,30 @@ class PublishXRef(Base):
inbredset_id=inbredset_id).all()
unique = set()
- for item in results[0]:
- #print("locals:", locals())
- if not item:
- continue
- for token in item.split():
- if token.startswith(('(','[')):
- token = token[1:]
- if token.endswith((')', ']')):
- token = token[:-1]
- if token.endswith(';'):
- token = token[:-1]
- if len(token) > 2:
- try:
- # This hopefully ensures that the token is utf-8
- token = token.encode('utf-8')
- print(" ->", token)
- except UnicodeDecodeError:
- print("\n-- UDE \n")
- # Can't get it into utf-8, we won't use it
- continue
-
- unique.add(token)
+ print("results: ", results)
+ if len(results):
+ for item in results[0]:
+ #print("locals:", locals())
+ if not item:
+ continue
+ for token in item.split():
+ if token.startswith(('(','[')):
+ token = token[1:]
+ if token.endswith((')', ']')):
+ token = token[:-1]
+ if token.endswith(';'):
+ token = token[:-1]
+ if len(token) > 2:
+ try:
+ # This hopefully ensures that the token is utf-8
+ token = token.encode('utf-8')
+ print(" ->", token)
+ except UnicodeDecodeError:
+ print("\n-- UDE \n")
+ # Can't get it into utf-8, we won't use it
+ continue
+
+ unique.add(token)
#print("\nUnique terms are: {}\n".format(unique))
return " ".join(unique)
@@ -467,8 +470,8 @@ QuickSearch = sa.Table("QuickSearch", Metadata,
mysql_engine = 'MyISAM',
)
-#QuickSearch.drop(Engine, checkfirst=True)
-#Metadata.create_all(Engine)
+QuickSearch.drop(Engine, checkfirst=True)
+Metadata.create_all(Engine)
def row2dict(row):
@@ -495,9 +498,10 @@ def main():
Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables.
"""
+
+ GenoXRef.run()
+ PublishXRef.run()
ProbeSetXRef.run()
- #GenoXRef.run()
- #PublishXRef.run()
if __name__ == "__main__":
main() \ No newline at end of file