diff options
Diffstat (limited to 'uploader')
| -rw-r--r-- | uploader/phenotypes/models.py | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index e40155f..af06376 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -4,7 +4,7 @@ import tempfile from pathlib import Path from functools import reduce from datetime import datetime -from typing import Optional, Iterable +from typing import Union, Optional, Iterable import MySQLdb as mdb from MySQLdb.cursors import Cursor, DictCursor @@ -248,6 +248,59 @@ def phenotypes_data(conn: mdb.Connection, return tuple(dict(row) for row in cursor.fetchall()) +def phenotypes_vector_data( + conn: mdb.Connection, + species_id: int, + population_id: int, + xref_ids: tuple[int, ...] = tuple(), + offset: int = 0, + limit: Optional[int] = None +) -> dict[tuple[int, int, int]: dict[str, Union[int,float]]]: + """Retrieve the vector data values for traits in the database.""" + _params = (species_id, population_id) + _query = ("SELECT " + "Species.Id AS SpeciesId, iset.Id AS InbredSetId, " + "pxr.Id AS xref_id, pdata.*, Strain.Id AS StrainId, " + "Strain.Name AS StrainName " + "FROM " + "Species INNER JOIN InbredSet AS iset " + "ON Species.Id=iset.SpeciesId " + "INNER JOIN PublishXRef AS pxr " + "ON iset.Id=pxr.InbredSetId " + "INNER JOIN PublishData AS pdata " + "ON pxr.DataId=pdata.Id " + "INNER JOIN Strain " + "ON pdata.StrainId=Strain.Id " + "WHERE Species.Id=%s AND iset.Id=%s") + if len(xref_ids) > 0: + _paramstr = ", ".join(["%s"] * len(xref_ids)) + _query = _query + f" AND pxr.Id IN ({_paramstr})" + _params = _params + xref_ids + + def __organise__(acc, row): + _rowid = (species_id, population_id, row["xref_id"]) + _phenodata = { + **acc.get( + _rowid, { + "species_id": species_id, + "population_id": population_id, + "xref_id": row["xref_id"] + }), + row["StrainName"]: row["value"] + } + return { + **acc, + _rowid: _phenodata + } + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + _query + (f" LIMIT {limit} OFFSET {offset}" if bool(limit) else ""), + _params) + debug_query(cursor, logger) + return reduce(__organise__, cursor.fetchall(), {}) + + def save_new_dataset(cursor: Cursor, population_id: int, dataset_name: str, |
