aboutsummaryrefslogtreecommitdiff
path: root/gn2/base/data_set/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/base/data_set/__init__.py')
-rw-r--r--gn2/base/data_set/__init__.py124
1 files changed, 124 insertions, 0 deletions
diff --git a/gn2/base/data_set/__init__.py b/gn2/base/data_set/__init__.py
new file mode 100644
index 00000000..40ef1c75
--- /dev/null
+++ b/gn2/base/data_set/__init__.py
@@ -0,0 +1,124 @@
+"The data_set package ..."
+
+# builtins imports
+import json
+import pickle as pickle
+
+# 3rd-party imports
+from redis import Redis
+
+# local imports
+from .dataset import DataSet
+from gn2.base import webqtlConfig
+from gn2.utility.tools import get_setting, USE_REDIS
+from .datasettype import DatasetType
+from .tempdataset import TempDataSet
+from .datasetgroup import DatasetGroup
+from .utils import query_table_timestamp
+from .genotypedataset import GenotypeDataSet
+from .phenotypedataset import PhenotypeDataSet
+from .mrnaassaydataset import MrnaAssayDataSet
+from gn2.wqflask.database import database_connection
+
+# Used by create_database to instantiate objects
+# Each subclass will add to this
+
+DS_NAME_MAP = {
+ "Temp": "TempDataSet",
+ "Geno": "GenotypeDataSet",
+ "Publish": "PhenotypeDataSet",
+ "ProbeSet": "MrnaAssayDataSet"
+}
+
+def __dataset_type__(dataset_name):
+ """Get dataset type."""
+ if "Temp" in dataset_name:
+ return "Temp"
+ if "Geno" in dataset_name:
+ return "Geno"
+ if "Publish" in dataset_name:
+ return "Publish"
+ return "ProbeSet"
+
+def create_dataset(dataset_name, dataset_type=None,
+ get_samplelist=True, group_name=None, redis_conn=Redis()):
+ dataset_type = dataset_type or __dataset_type__(dataset_name)
+
+ dataset_ob = DS_NAME_MAP[dataset_type]
+ dataset_class = globals()[dataset_ob]
+ if dataset_type == "Temp":
+ return dataset_class(dataset_name, get_samplelist, group_name)
+ else:
+ return dataset_class(dataset_name, get_samplelist)
+
+def datasets(group_name, this_group=None, redis_conn=Redis()):
+ key = "group_dataset_menu:v2:" + group_name
+ dataset_menu = []
+ with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
+ cursor.execute('''
+ (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name
+ FROM PublishFreeze,InbredSet
+ WHERE PublishFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = '%s'
+ ORDER BY PublishFreeze.Id ASC)
+ UNION
+ (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name
+ FROM GenoFreeze, InbredSet
+ WHERE GenoFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = '%s')
+ UNION
+ (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name
+ FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
+ and ProbeFreeze.TissueId = Tissue.Id
+ and ProbeFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name like %s
+ ORDER BY Tissue.Name, ProbeSetFreeze.OrderList DESC)
+ ''' % (group_name,
+ group_name,
+ "'" + group_name + "'"))
+ the_results = cursor.fetchall()
+
+ sorted_results = sorted(the_results, key=lambda kv: kv[0])
+
+ # ZS: This is kind of awkward, but need to ensure Phenotypes show up before Genotypes in dropdown
+ pheno_inserted = False
+ geno_inserted = False
+ for dataset_item in sorted_results:
+ tissue_name = dataset_item[0]
+ dataset = dataset_item[1]
+ dataset_short = dataset_item[2]
+ if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
+ if tissue_name == '#PublishFreeze' and (dataset_short == group_name + 'Publish'):
+ dataset_menu.insert(
+ 0, dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ pheno_inserted = True
+ elif pheno_inserted and tissue_name == '#GenoFreeze':
+ dataset_menu.insert(
+ 1, dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ geno_inserted = True
+ else:
+ dataset_menu.append(
+ dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ else:
+ tissue_already_exists = False
+ for i, tissue_dict in enumerate(dataset_menu):
+ if tissue_dict['tissue'] == tissue_name:
+ tissue_already_exists = True
+ break
+
+ if tissue_already_exists:
+ dataset_menu[i]['datasets'].append((dataset, dataset_short))
+ else:
+ dataset_menu.append(dict(tissue=tissue_name,
+ datasets=[(dataset, dataset_short)]))
+
+ if USE_REDIS:
+ redis_conn.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
+ redis_conn.expire(key, 60 * 5)
+
+ if this_group != None:
+ this_group._datasets = dataset_menu
+ return this_group._datasets
+ else:
+ return dataset_menu