From 5b33832cbb6a7eb40cd87128efc3bcf2b5a54726 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 17 Oct 2024 14:32:22 -0500 Subject: Fetch samples from database Fetch the samples from the database. These will be used to verify that the samples in the phenotype files already exist in the database and are valid. --- scripts/rqtl2/phenotypes_qc.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index e495a97..d565be2 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -3,6 +3,7 @@ import sys import tempfile from pathlib import Path from zipfile import ZipFile +from functools import reduce import multiprocessing as mproc from logging import Logger, getLogger, StreamHandler @@ -13,6 +14,8 @@ from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe from uploader.files import sha256_digest_over_file +from uploader.samples.models import samples_by_species_and_population + from scripts.rqtl2.entry import build_main from scripts.rqtl2.cli_parser import add_bundle_argument from scripts.cli_parser import init_cli_parser, add_global_data_arguments @@ -144,6 +147,15 @@ def run_qc(# pylint: disable=[too-many-arguments] ((ftype, cdata, extractiondir) for ftype in ("pheno", "phenocovar", "phenose", "phenonum"))) + # - Fetch samples/individuals from database. + samples = tuple( + item for item in set(reduce( + lambda acc, item: acc + ( + item["Name"], item["Name2"], item["Symbol"], item["Alias"]), + samples_by_species_and_population(dbconn, speciesid, populationid), + tuple())) + if bool(item)) + # - Check that `description` and `units` is present in phenocovar for # all phenotypes # - Check all phenotypes in pheno files exist in phenocovar files -- cgit v1.2.3