From 0e42d84dbeee042e3ba9086e8edf5160eb45ff9c Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 13 Apr 2023 19:02:30 +0000 Subject: Add function for retrieving samplelist from .geno file --- gn3/db/datasets.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index bc5467b..65ab1f8 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -4,6 +4,35 @@ This module contains functions relating to specific trait dataset manipulation from typing import Any +def retrieve_sample_list( + group: str, connection: Any): + """ + Get the sample list for a group (a category that datasets belong to) + + Currently it is fetched from the .geno files, since that's the only place + the "official" sample list is stored + """ + + genofile_path = current_app.config.get("RQTL_WRAPPER_CMD") + "/" + group + ".geno" + if os.path.isfile(genofile_path): + genofile = open(genofile_path) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split("\t") + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + def retrieve_probeset_trait_dataset_name( threshold: int, name: str, connection: Any): """ -- cgit v1.2.3