diff options
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r-- | wqflask/maintenance/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 94 | ||||
-rw-r--r-- | wqflask/maintenance/get_group_samplelists.py | 43 | ||||
l--------- | wqflask/maintenance/our_settings.py | 1 | ||||
-rw-r--r-- | wqflask/maintenance/quick_search_table.py | 62 |
5 files changed, 151 insertions, 49 deletions
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/wqflask/maintenance/__init__.py diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index 4c544192..ad560659 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -29,25 +29,47 @@ It needs to be run manually when database has been changed. from __future__ import print_function, division -import sys +#from flask import config +# +#cdict = {} +#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS') +#print("cdict is:", cdict) -sys.path.insert(0, "..") +import our_settings import MySQLdb import simplejson as json +import urlparse -from pprint import pformat as pf -from base import webqtlConfig +#import sqlalchemy as sa + +from pprint import pformat as pf +#Engine = sa.create_engine(our_settings.SQLALCHEMY_DATABASE_URI) # build MySql database connection -Con = MySQLdb.Connect(db=webqtlConfig.DB_NAME, - host=webqtlConfig.MYSQL_SERVER, - user=webqtlConfig.DB_USER, - passwd=webqtlConfig.DB_PASSWD) -Cursor = Con.cursor() + +#conn = Engine.connect() + + + + + +def parse_db_uri(db_uri): + """Converts a database URI to the db name, host name, user name, and password""" + + parsed_uri = urlparse.urlparse(our_settings.DB_URI) + + db_conn_info = dict( + db = parsed_uri.path[1:], + host = parsed_uri.hostname, + user = parsed_uri.username, + passwd = parsed_uri.password) + + return db_conn_info + def get_species(): @@ -77,12 +99,14 @@ def get_groups(species): def get_types(groups): """Build types list""" types = {} + print("Groups: ", pf(groups)) for species, group_dict in groups.iteritems(): types[species] = {} for group_name, _group_full_name in group_dict: # make group an alias to shorten the code types[species][group_name] = [("Phenotypes", "Phenotypes"), ("Genotypes", "Genotypes")] types[species][group_name] += build_types(species, group_name) + return types @@ -93,17 +117,32 @@ def build_types(species, group): (all types except phenotype/genotype are tissues) """ - Cursor.execute("""select distinct Tissue.Name, concat(Tissue.Name, ' mRNA') + + print("""select distinct Tissue.Name + from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species + where Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '{}' and + ProbeFreeze.TissueId = Tissue.Id and + ProbeFreeze.InbredSetId = InbredSet.Id and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and + ProbeSetFreeze.public > 0 + order by Tissue.Name""".format(species, group)) + Cursor.execute("""select distinct Tissue.Name from ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species where Species.Name = %s and Species.Id = InbredSet.SpeciesId and InbredSet.Name = %s and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = InbredSet.Id and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and - ProbeSetFreeze.public > %s - order by Tissue.Name""", (species, group, webqtlConfig.PUBLICTHRESH)) - return Cursor.fetchall() - + ProbeSetFreeze.public > 0 + order by Tissue.Name""", (species, group)) + + results = [] + for result in Cursor.fetchall(): + if len(result): + results.append((result[0], result[0])) + + return results def get_datasets(types): """Build datasets list""" @@ -111,9 +150,10 @@ def get_datasets(types): for species, group_dict in types.iteritems(): datasets[species] = {} for group, type_list in group_dict.iteritems(): + print("type_list: ", type_list) datasets[species][group] = {} - for type_name, _type_full_name in type_list: - datasets[species][group][type_name] = build_datasets(species, group, type_name) + for type_name in type_list: + datasets[species][group][type_name[0]] = build_datasets(species, group, type_name[0]) return datasets @@ -134,20 +174,30 @@ def build_datasets(species, group, type_name): if dataset_value: return [(dataset_value, dataset_text)] else: + print("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from + ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where + Species.Name = '{}' and Species.Id = InbredSet.SpeciesId and + InbredSet.Name = '{}' and + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = '{}' + and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = + InbredSet.Id and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""".format(species, group, type_name)) Cursor.execute("""select ProbeSetFreeze.Name, ProbeSetFreeze.FullName from ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where Species.Name = %s and Species.Id = InbredSet.SpeciesId and InbredSet.Name = %s and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and Tissue.Name = %s and ProbeFreeze.TissueId = Tissue.Id and ProbeFreeze.InbredSetId = - InbredSet.Id and ProbeSetFreeze.public > %s order by - ProbeSetFreeze.CreateTime desc""", ( - species, group, type_name, webqtlConfig.PUBLICTHRESH)) + InbredSet.Id and ProbeSetFreeze.public > 0 order by + ProbeSetFreeze.CreateTime desc""", (species, group, type_name)) return Cursor.fetchall() def main(): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" + + parse_db_uri(our_settings.DB_URI) + species = get_species() groups = get_groups(species) types = get_types(groups) @@ -167,6 +217,8 @@ def main(): datasets=datasets, ) + print("data:", data) + output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json""" with open(output_file, 'w') as fh: @@ -182,5 +234,7 @@ def _test_it(): datasets = build_datasets("Mouse", "BXD", "Hippocampus") print("build_datasets:", pf(datasets)) -if __name__ == '__main__': +if __name__ == '__main__': + Conn = MySQLdb.Connect(**parse_db_uri(our_settings.DB_URI)) + Cursor = Conn.cursor() main() diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py new file mode 100644 index 00000000..c9ec3872 --- /dev/null +++ b/wqflask/maintenance/get_group_samplelists.py @@ -0,0 +1,43 @@ +from __future__ import absolute_import, print_function, division + +import os +import glob +import gzip + +from base import webqtlConfig + + +def process_genofiles(geno_dir=webqtlConfig.GENODIR): + print("Yabba") + #sys.exit("Dabba") + os.chdir(geno_dir) + for geno_file in glob.glob("*"): + if geno_file.lower().endswith(('.geno', '.geno.gz')): + #group_name = genofilename.split('.')[0] + sample_list = get_samplelist(geno_file) + + +def get_samplelist(geno_file): + genofilename = os.path.join(webqtlConfig.GENODIR, geno_file) + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' + genofile = gzip.open(genofilename) + else: + genofile = open(genofilename) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split() + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + diff --git a/wqflask/maintenance/our_settings.py b/wqflask/maintenance/our_settings.py new file mode 120000 index 00000000..14efe407 --- /dev/null +++ b/wqflask/maintenance/our_settings.py @@ -0,0 +1 @@ +../../../zach_settings.py
\ No newline at end of file diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index b07e7656..9cd792ef 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -71,9 +71,10 @@ class PublishXRef(Base): values['the_key'] = json.dumps([pub_row.Id, pub_row.InbredSetId]) values['terms'] = cls.get_unique_terms(pub_row.Id, pub_row.InbredSetId) print("terms is:", values['terms']) - values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) - ins = QuickSearch.insert().values(**values) - conn.execute(ins) + if values['terms']: + values['result_fields'] = cls.get_result_fields(pub_row.Id, pub_row.InbredSetId) + ins = QuickSearch.insert().values(**values) + conn.execute(ins) counter += 1 print("Done:", counter) @@ -100,28 +101,30 @@ class PublishXRef(Base): inbredset_id=inbredset_id).all() unique = set() - for item in results[0]: - #print("locals:", locals()) - if not item: - continue - for token in item.split(): - if token.startswith(('(','[')): - token = token[1:] - if token.endswith((')', ']')): - token = token[:-1] - if token.endswith(';'): - token = token[:-1] - if len(token) > 2: - try: - # This hopefully ensures that the token is utf-8 - token = token.encode('utf-8') - print(" ->", token) - except UnicodeDecodeError: - print("\n-- UDE \n") - # Can't get it into utf-8, we won't use it - continue - - unique.add(token) + print("results: ", results) + if len(results): + for item in results[0]: + #print("locals:", locals()) + if not item: + continue + for token in item.split(): + if token.startswith(('(','[')): + token = token[1:] + if token.endswith((')', ']')): + token = token[:-1] + if token.endswith(';'): + token = token[:-1] + if len(token) > 2: + try: + # This hopefully ensures that the token is utf-8 + token = token.encode('utf-8') + print(" ->", token) + except UnicodeDecodeError: + print("\n-- UDE \n") + # Can't get it into utf-8, we won't use it + continue + + unique.add(token) #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @@ -467,8 +470,8 @@ QuickSearch = sa.Table("QuickSearch", Metadata, mysql_engine = 'MyISAM', ) -#QuickSearch.drop(Engine, checkfirst=True) -#Metadata.create_all(Engine) +QuickSearch.drop(Engine, checkfirst=True) +Metadata.create_all(Engine) def row2dict(row): @@ -495,9 +498,10 @@ def main(): Add all items from the ProbeSetXRef, GenoXRef, and PublishXRef tables to the QuickSearch tables. """ + + GenoXRef.run() + PublishXRef.run() ProbeSetXRef.run() - #GenoXRef.run() - #PublishXRef.run() if __name__ == "__main__": main()
\ No newline at end of file |