aboutsummaryrefslogtreecommitdiff
path: root/qc_app/db/datasets.py
blob: bf5ac251f943978b57c96cbdb945ceab95bf2ab8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""Functions for accessing the database relating to datasets."""
from datetime import date
from typing import Optional

import MySQLdb as mdb
from MySQLdb.cursors import DictCursor

def geno_datasets_by_species_and_population(
        conn: mdb.Connection,
        speciesid: int,
        populationid: int) -> tuple[dict, ...]:
    """Retrieve all genotypes datasets by species and population"""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute(
            "SELECT gf.* FROM InbredSet AS iset INNER JOIN GenoFreeze AS gf "
            "ON iset.InbredSetId=gf.InbredSetId "
            "WHERE iset.SpeciesId=%(sid)s AND iset.InbredSetId=%(pid)s",
            {"sid": speciesid, "pid": populationid})
        return tuple(dict(row) for row in cursor.fetchall())

def geno_dataset_by_id(conn: mdb.Connection, dataset_id) -> Optional[dict]:
    """Retrieve genotype dataset by ID"""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", (dataset_id,))
        _dataset = cursor.fetchone()
        return dict(_dataset) if bool(_dataset) else None

def probeset_studies_by_species_and_population(
        conn: mdb.Connection,
        speciesid: int,
        populationid: int) -> tuple[dict, ...]:
    """Retrieve all probesets"""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute(
            "SELECT pf.* FROM InbredSet AS iset INNER JOIN ProbeFreeze AS pf "
            "ON iset.InbredSetId=pf.InbredSetId "
            "WHERE iset.SpeciesId=%(sid)s AND iset.InbredSetId=%(pid)s",
            {"sid": speciesid, "pid": populationid})
        return tuple(dict(row) for row in cursor.fetchall())

def probeset_datasets_by_study(conn: mdb.Connection,
                               studyid: int) -> tuple[dict, ...]:
    """Retrieve all probeset databases by study."""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute("SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s",
                       (studyid,))
        return tuple(dict(row) for row in cursor.fetchall())

def probeset_study_by_id(conn: mdb.Connection, studyid) -> Optional[dict]:
    """Retrieve ProbeSet study by ID"""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute("SELECT * FROM ProbeFreeze WHERE Id=%s", (studyid,))
        _study = cursor.fetchone()
        return dict(_study) if bool(_study) else None

def probeset_create_study(conn: mdb.Connection,#pylint: disable=[too-many-arguments]
                          populationid: int,
                          platformid: int,
                          tissueid: int,
                          studyname: str,
                          studyfullname: str = "",
                          studyshortname: str = ""):
    """Create a new ProbeSet study."""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        studydata = {
            "platid": platformid,
            "tissueid": tissueid,
            "name": studyname,
            "fname": studyfullname or studyname,
            "sname": studyshortname,
            "today": date.today().isoformat(),
            "popid": populationid
        }
        cursor.execute(
            """
            INSERT INTO ProbeFreeze(
              ChipId, TissueId, Name, FullName, ShortName, CreateTime,
              InbredSetId
            ) VALUES (
              %(platid)s, %(tissueid)s, %(name)s, %(fname)s, %(sname)s,
              %(today)s, %(popid)s
            )
            """,
            studydata)
        studyid = cursor.lastrowid
        cursor.execute("UPDATE ProbeFreeze SET ProbeFreezeId=%s",
                       (studyid,))
        return {**studydata, "studyid": studyid}

def probeset_create_dataset(conn: mdb.Connection,#pylint: disable=[too-many-arguments]
                            studyid: int,
                            averageid: int,
                            datasetname: str,
                            datasetfullname: str,
                            datasetshortname: str="",
                            public: bool = True,
                            datascale="log2") -> dict:
    """Create a new ProbeSet dataset."""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        dataset = {
            "studyid": studyid,
            "averageid": averageid,
            "name2": datasetname,
            "fname": datasetfullname,
            "name": datasetshortname,
            "sname": datasetshortname,
            "today": date.today().isoformat(),
            "public": 2 if public else 0,
            "authorisedusers": "williamslab",
            "datascale": datascale
        }
        cursor.execute(
            """
            INSERT INTO ProbeSetFreeze(
              ProbeFreezeId, AvgId, Name, Name2, FullName, ShortName,
              CreateTime, public, AuthorisedUsers, DataScale)
            VALUES(
              %(studyid)s, %(averageid)s, %(name)s, %(name2)s, %(fname)s,
              %(sname)s, %(today)s, %(public)s, %(authorisedusers)s,
              %(datascale)s)
            """,
            dataset)
        return {**dataset, "datasetid": cursor.lastrowid}

def probeset_dataset_by_id(conn: mdb.Connection, datasetid) -> Optional[dict]:
    """Fetch a ProbeSet dataset by its ID"""
    with conn.cursor(cursorclass=DictCursor) as cursor:
        cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Id=%s", (datasetid,))
        result = cursor.fetchone()
        if bool(result):
            return dict(result)

    return None