aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/datasets.py
diff options
context:
space:
mode:
authorzsloan2023-04-13 19:02:30 +0000
committerzsloan2023-05-22 17:23:08 +0000
commit0e42d84dbeee042e3ba9086e8edf5160eb45ff9c (patch)
tree58d31ee04ed78e3a0fb44274a943fe5522994c47 /gn3/db/datasets.py
parentcb52df1c320463674938462a68e0a3be4279473b (diff)
downloadgenenetwork3-0e42d84dbeee042e3ba9086e8edf5160eb45ff9c.tar.gz
Add function for retrieving samplelist from .geno file
Diffstat (limited to 'gn3/db/datasets.py')
-rw-r--r--gn3/db/datasets.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index bc5467b..65ab1f8 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -4,6 +4,35 @@ This module contains functions relating to specific trait dataset manipulation
from typing import Any
+def retrieve_sample_list(
+ group: str, connection: Any):
+ """
+ Get the sample list for a group (a category that datasets belong to)
+
+ Currently it is fetched from the .geno files, since that's the only place
+ the "official" sample list is stored
+ """
+
+ genofile_path = current_app.config.get("RQTL_WRAPPER_CMD") + "/" + group + ".geno"
+ if os.path.isfile(genofile_path):
+ genofile = open(genofile_path)
+
+ for line in genofile:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith(("#", "@")):
+ continue
+ break
+
+ headers = line.split("\t")
+
+ if headers[3] == "Mb":
+ samplelist = headers[4:]
+ else:
+ samplelist = headers[3:]
+ return samplelist
+
def retrieve_probeset_trait_dataset_name(
threshold: int, name: str, connection: Any):
"""