Add function to retrieve vector data for phenotypes from the db.

author: Frederick Muriuki Muriithi 2025-11-19 13:33:14 -0600
committer: Frederick Muriuki Muriithi 2025-11-19 16:02:56 -0600
commit: 59bf6ae36116f2925b9b0988aff4720653da4086 (patch)
tree: b7693303abc08fd1c7e67ca1230f448836eec2e4
parent: dc82ee9c1ae402e26a7780c5785cbefe6c909acf (diff)
download: gn-uploader-59bf6ae36116f2925b9b0988aff4720653da4086.tar.gz
1 files changed, 54 insertions, 1 deletions
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index e40155f..af06376 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -4,7 +4,7 @@ import tempfile
 from pathlib import Path
 from functools import reduce
 from datetime import datetime
-from typing import Optional, Iterable
+from typing import Union, Optional, Iterable
 
 import MySQLdb as mdb
 from MySQLdb.cursors import Cursor, DictCursor
@@ -248,6 +248,59 @@ def phenotypes_data(conn: mdb.Connection,
         return tuple(dict(row) for row in cursor.fetchall())
 
 
+def phenotypes_vector_data(
+        conn: mdb.Connection,
+        species_id: int,
+        population_id: int,
+        xref_ids: tuple[int, ...] = tuple(),
+        offset: int = 0,
+        limit: Optional[int] = None
+) -> dict[tuple[int, int, int]: dict[str, Union[int,float]]]:
+    """Retrieve the vector data values for traits in the database."""
+    _params = (species_id, population_id)
+    _query = ("SELECT "
+              "Species.Id AS SpeciesId, iset.Id AS InbredSetId, "
+              "pxr.Id AS xref_id, pdata.*, Strain.Id AS StrainId, "
+              "Strain.Name AS StrainName "
+              "FROM "
+              "Species INNER JOIN InbredSet AS iset "
+              "ON Species.Id=iset.SpeciesId "
+              "INNER JOIN PublishXRef AS pxr "
+              "ON iset.Id=pxr.InbredSetId "
+              "INNER JOIN PublishData AS pdata "
+              "ON pxr.DataId=pdata.Id "
+              "INNER JOIN Strain "
+              "ON pdata.StrainId=Strain.Id "
+              "WHERE Species.Id=%s AND iset.Id=%s")
+    if len(xref_ids) > 0:
+        _paramstr = ", ".join(["%s"] * len(xref_ids))
+        _query = _query + f" AND pxr.Id IN ({_paramstr})"
+        _params = _params + xref_ids
+
+    def __organise__(acc, row):
+        _rowid = (species_id, population_id, row["xref_id"])
+        _phenodata = {
+            **acc.get(
+                _rowid, {
+                    "species_id": species_id,
+                    "population_id": population_id,
+                    "xref_id": row["xref_id"]
+                }),
+            row["StrainName"]: row["value"]
+        }
+        return {
+            **acc,
+            _rowid: _phenodata
+        }
+
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(
+            _query + (f" LIMIT {limit} OFFSET {offset}" if bool(limit) else ""),
+            _params)
+        debug_query(cursor, logger)
+        return reduce(__organise__, cursor.fetchall(), {})
+
+
 def save_new_dataset(cursor: Cursor,
                      population_id: int,
                      dataset_name: str,
author	Frederick Muriuki Muriithi	2025-11-19 13:33:14 -0600
committer	Frederick Muriuki Muriithi	2025-11-19 16:02:56 -0600
commit	59bf6ae36116f2925b9b0988aff4720653da4086 (patch)
tree	b7693303abc08fd1c7e67ca1230f448836eec2e4
parent	dc82ee9c1ae402e26a7780c5785cbefe6c909acf (diff)
download	gn-uploader-59bf6ae36116f2925b9b0988aff4720653da4086.tar.gz