From e650648090ff5410f15f0076b7716e9360c27a13 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Fri, 27 Sep 2024 11:46:21 -0500 Subject: Improve query for fetching a phenotype dataset's data --- uploader/phenotypes/models.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'uploader/phenotypes') diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index c3b6dfb..d99aa36 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -76,14 +76,16 @@ def phenotypes_data(conn: mdb.Connection, offset: int = 0, limit: Optional[int] = None) -> tuple[dict, ...]: """Fetch the data for the phenotypes.""" - #TODO: This query isn't exactly right, it misses some data. - # — Phenotype -> PublishXRef -> PublishData -> Strain -> PublishFreeze - _query = ("SELECT pxr.*, pd.*, str.* FROM PublishFreeze AS pf " - "INNER JOIN PublishXRef AS pxr ON pf.InbredSetId=pxr.InbredSetId " + # — Phenotype -> PublishXRef -> PublishData -> Strain -> StrainXRef -> PublishFreeze + _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " "INNER JOIN Strain AS str ON pd.StrainId=str.Id " - "WHERE pf.InbredSetId=%s AND pf.Id=%s " - "ORDER BY pxr.DataId ASC, str.Id ASC") + ( + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(_query, (population_id, dataset_id)) -- cgit v1.2.3