aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/datasets.py
diff options
context:
space:
mode:
authorAlexander Kabui2021-03-30 18:13:35 +0300
committerAlexander Kabui2021-03-30 18:13:35 +0300
commitb67938a6730e0dc557f0d4aa978e0b9aa9211772 (patch)
treefe5db4a91a8d325287271b132afe53533e1b6dd0 /gn3/computations/datasets.py
parente6f10522833cbd75441766e5b8656b3f5925d6d7 (diff)
downloadgenenetwork3-b67938a6730e0dc557f0d4aa978e0b9aa9211772.tar.gz
initial commit for creating dataset
Diffstat (limited to 'gn3/computations/datasets.py')
-rw-r--r--gn3/computations/datasets.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py
new file mode 100644
index 0000000..92a7bdf
--- /dev/null
+++ b/gn3/computations/datasets.py
@@ -0,0 +1,66 @@
+"""module contains the code all related to datasets"""
+from unittest import mock
+
+from typing import Optional
+from typing import List
+
+
+def retrieve_trait_sample_data(dataset_id, dataset_type: str, trait_name: str) -> List:
+ """given the dataset id and trait_name fetch the\
+ sample_name,value from the dataset"""
+
+ # should pass the db as arg all do a setup
+
+ _func_args = (dataset_id, dataset_type, trait_name)
+ dataset_query = get_query_for_dataset_sample(dataset_type)
+
+ if dataset_query:
+ if dataset_type == "Publish":
+ formatted_query = dataset_query % (trait_name, dataset_id)
+ results = fetch_from_db_sample_data(formatted_query, mock.Mock())
+ return results
+
+ return []
+
+ return []
+
+
+def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
+ """this is the function that does the actual fetching of\
+ results from the database"""
+ cursor = database_instance.cursor()
+ cursor.execute(formatted_query)
+ results = cursor.fetchall()
+
+ cursor.close()
+
+ return results
+
+
+def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
+ """this functions contains querys for\
+ getting sample data from the db depending in
+ dataset"""
+ dataset_query = {}
+
+ pheno_query = """
+ SELECT
+ Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
+ FROM
+ (PublishData, Strain, PublishXRef, PublishFreeze)
+ left join PublishSE on
+ (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
+ left join NStrain on
+ (NStrain.DataId = PublishData.Id AND
+ NStrain.StrainId = PublishData.StrainId)
+ WHERE
+ PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
+ PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
+ Order BY
+ Strain.Name
+ """
+
+ dataset_query["Publish"] = pheno_query
+
+ return dataset_query.get(dataset_type)