aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance/gen_select_dataset.py
diff options
context:
space:
mode:
authorArthur Centeno2021-10-25 21:04:23 +0000
committerArthur Centeno2021-10-25 21:04:23 +0000
commit499a80f138030c4de1629c043c8f9401a99894ea (patch)
tree449dcae965d13f966fb6d52625fbc86661c8c6a0 /wqflask/maintenance/gen_select_dataset.py
parent6151faa9ea67af4bf4ea95fb681a9dc4319474b6 (diff)
parent700802303e5e8221a9d591ba985d6607aa61e1ce (diff)
downloadgenenetwork2-499a80f138030c4de1629c043c8f9401a99894ea.tar.gz
Merge github.com:genenetwork/genenetwork2 into acenteno
Diffstat (limited to 'wqflask/maintenance/gen_select_dataset.py')
-rw-r--r--wqflask/maintenance/gen_select_dataset.py49
1 files changed, 24 insertions, 25 deletions
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 647e58a2..db65a11f 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -30,18 +30,10 @@ It needs to be run manually when database has been changed. Run it as
#
# This module is used by GeneNetwork project (www.genenetwork.org)
-from __future__ import print_function, division
-
-#from flask import config
-#
-#cdict = {}
-#config = config.Config(cdict).from_envvar('WQFLASK_SETTINGS')
-#print("cdict is:", cdict)
-
import sys
# NEW: Note we prepend the current path - otherwise a guix instance of GN2 may be used instead
-sys.path.insert(0,'./')
+sys.path.insert(0, './')
# NEW: import app to avoid a circular dependency on utility.tools
from wqflask import app
@@ -50,7 +42,7 @@ from utility.tools import locate, locate_ignore_error, TEMPDIR, SQL_URI
import MySQLdb
import simplejson as json
-import urlparse
+import urllib.parse
#import sqlalchemy as sa
@@ -63,16 +55,17 @@ from pprint import pformat as pf
#conn = Engine.connect()
+
def parse_db_uri():
"""Converts a database URI to the db name, host name, user name, and password"""
- parsed_uri = urlparse.urlparse(SQL_URI)
+ parsed_uri = urllib.parse.urlparse(SQL_URI)
db_conn_info = dict(
- db = parsed_uri.path[1:],
- host = parsed_uri.hostname,
- user = parsed_uri.username,
- passwd = parsed_uri.password)
+ db=parsed_uri.path[1:],
+ host=parsed_uri.hostname,
+ user=parsed_uri.username,
+ passwd=parsed_uri.password)
print(db_conn_info)
return db_conn_info
@@ -108,7 +101,7 @@ def get_types(groups):
"""Build types list"""
types = {}
#print("Groups: ", pf(groups))
- for species, group_dict in groups.iteritems():
+ for species, group_dict in list(groups.items()):
types[species] = {}
for group_name, _group_full_name in group_dict:
# make group an alias to shorten the code
@@ -127,21 +120,23 @@ def get_types(groups):
else:
if not phenotypes_exist(group_name) and not genotypes_exist(group_name):
types[species].pop(group_name, None)
- groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
- else: #ZS: This whole else statement might be unnecessary, need to check
+ groups[species] = tuple(
+ group for group in groups[species] if group[0] != group_name)
+ else: # ZS: This whole else statement might be unnecessary, need to check
types_list = build_types(species, group_name)
if len(types_list) > 0:
types[species][group_name] = types_list
else:
types[species].pop(group_name, None)
- groups[species] = tuple(group for group in groups[species] if group[0] != group_name)
+ groups[species] = tuple(
+ group for group in groups[species] if group[0] != group_name)
return types
def phenotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from PublishFreeze
- where PublishFreeze.Name = '%s'""" % (group_name+"Publish"))
+ where PublishFreeze.Name = '%s'""" % (group_name + "Publish"))
results = Cursor.fetchone()
#print("RESULTS:", results)
@@ -151,10 +146,11 @@ def phenotypes_exist(group_name):
else:
return False
+
def genotypes_exist(group_name):
#print("group_name:", group_name)
Cursor.execute("""select Name from GenoFreeze
- where GenoFreeze.Name = '%s'""" % (group_name+"Geno"))
+ where GenoFreeze.Name = '%s'""" % (group_name + "Geno"))
results = Cursor.fetchone()
#print("RESULTS:", results)
@@ -164,6 +160,7 @@ def genotypes_exist(group_name):
else:
return False
+
def build_types(species, group):
"""Fetches tissues
@@ -192,12 +189,13 @@ def build_types(species, group):
return results
+
def get_datasets(types):
"""Build datasets list"""
datasets = {}
- for species, group_dict in types.iteritems():
+ for species, group_dict in list(types.items()):
datasets[species] = {}
- for group, type_list in group_dict.iteritems():
+ for group, type_list in list(group_dict.items()):
datasets[species][group] = {}
for type_name in type_list:
these_datasets = build_datasets(species, group, type_name[0])
@@ -254,7 +252,7 @@ def build_datasets(species, group, type_name):
dataset_text = "%s Genotypes" % group
datasets.append((dataset_id, dataset_value, dataset_text))
- else: # for mRNA expression/ProbeSet
+ else: # for mRNA expression/ProbeSet
Cursor.execute("""select ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName from
ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue, Species where
Species.Name = '%s' and Species.Id = InbredSet.SpeciesId and
@@ -316,7 +314,8 @@ def _test_it():
datasets = build_datasets("Mouse", "BXD", "Hippocampus")
#print("build_datasets:", pf(datasets))
+
if __name__ == '__main__':
Conn = MySQLdb.Connect(**parse_db_uri())
Cursor = Conn.cursor()
- main() \ No newline at end of file
+ main()