about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/db/datasets.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index bc5467b..65ab1f8 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -4,6 +4,35 @@ This module contains functions relating to specific trait dataset manipulation
 from typing import Any
 
 
+def retrieve_sample_list(
+        group: str, connection: Any):
+    """
+    Get the sample list for a group (a category that datasets belong to)
+
+    Currently it is fetched from the .geno files, since that's the only place
+    the "official" sample list is stored
+    """
+
+    genofile_path = current_app.config.get("RQTL_WRAPPER_CMD") + "/" + group + ".geno"
+    if os.path.isfile(genofile_path):
+        genofile = open(genofile_path)
+
+        for line in genofile:
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith(("#", "@")):
+                continue
+            break
+
+        headers = line.split("\t")
+
+        if headers[3] == "Mb":
+            samplelist = headers[4:]
+        else:
+            samplelist = headers[3:]
+        return samplelist
+
 def retrieve_probeset_trait_dataset_name(
         threshold: int, name: str, connection: Any):
     """