"""Database and utility functions for phenotypes.""" from typing import Optional import MySQLdb as mdb from MySQLdb.cursors import DictCursor from uploader.db_utils import debug_query def datasets_by_population( conn: mdb.Connection, species_id: int, population_id: int ) -> tuple[dict, ...]: """Retrieve all of a population's phenotype studies.""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( "SELECT s.SpeciesId, pf.* FROM Species AS s " "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " "WHERE s.Id=%s AND iset.Id=%s;", (species_id, population_id)) return tuple(dict(row) for row in cursor.fetchall()) def dataset_by_id(conn: mdb.Connection, species_id: int, population_id: int, dataset_id: int) -> dict: """Fetch dataset details by identifier""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( "SELECT s.SpeciesId, pf.* FROM Species AS s " "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " "WHERE s.Id=%s AND iset.Id=%s AND pf.Id=%s", (species_id, population_id, dataset_id)) return dict(cursor.fetchone()) def phenotypes_count(conn: mdb.Connection, population_id: int, dataset_id: int) -> int: """Count the number of phenotypes in the dataset.""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( "SELECT COUNT(*) AS total_phenos FROM Phenotype AS pheno " "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " "WHERE pxr.InbredSetId=%s AND pf.Id=%s", (population_id, dataset_id)) return int(cursor.fetchone()["total_phenos"]) def dataset_phenotypes(conn: mdb.Connection, population_id: int, dataset_id: int, offset: int = 0, limit: Optional[int] = None) -> tuple[dict, ...]: """Fetch the actual phenotypes.""" _query = ( "SELECT pheno.*, pxr.Id, ist.InbredSetCode FROM Phenotype AS pheno " "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id " "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(_query, (population_id, dataset_id)) debug_query(cursor) return tuple(dict(row) for row in cursor.fetchall()) def phenotypes_data(conn: mdb.Connection, population_id: int, dataset_id: int, offset: int = 0, limit: Optional[int] = None) -> tuple[dict, ...]: """Fetch the data for the phenotypes.""" # — Phenotype -> PublishXRef -> PublishData -> Strain -> StrainXRef -> PublishFreeze _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " "FROM Phenotype AS pheno " "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " "INNER JOIN Strain AS str ON pd.StrainId=str.Id " "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(_query, (population_id, dataset_id)) debug_query(cursor) return tuple(dict(row) for row in cursor.fetchall()) def phenotypes_se(conn: mdb.Connection, dataset_id: int) -> tuple[dict, ...]: """Fetch the standard errors for the phenotypes.""" return tuple() def phenotypes_sample_counts(conn: mdb.Connection, dataset_id: int) -> tuple[dict, ...]: """Fetch the standard errors for the phenotypes.""" return tuple()