diff options
author | zsloan | 2023-04-13 19:02:30 +0000 |
---|---|---|
committer | zsloan | 2023-05-22 17:23:08 +0000 |
commit | 0e42d84dbeee042e3ba9086e8edf5160eb45ff9c (patch) | |
tree | 58d31ee04ed78e3a0fb44274a943fe5522994c47 | |
parent | cb52df1c320463674938462a68e0a3be4279473b (diff) | |
download | genenetwork3-0e42d84dbeee042e3ba9086e8edf5160eb45ff9c.tar.gz |
Add function for retrieving samplelist from .geno file
-rw-r--r-- | gn3/db/datasets.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index bc5467b..65ab1f8 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -4,6 +4,35 @@ This module contains functions relating to specific trait dataset manipulation from typing import Any +def retrieve_sample_list( + group: str, connection: Any): + """ + Get the sample list for a group (a category that datasets belong to) + + Currently it is fetched from the .geno files, since that's the only place + the "official" sample list is stored + """ + + genofile_path = current_app.config.get("RQTL_WRAPPER_CMD") + "/" + group + ".geno" + if os.path.isfile(genofile_path): + genofile = open(genofile_path) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split("\t") + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + def retrieve_probeset_trait_dataset_name( threshold: int, name: str, connection: Any): """ |