aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/datasets.py
blob: 6df57775dfd1a73252abd39e5bb497446d3a1d9d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""module contains the code all related to datasets"""
from unittest import mock

from typing import Optional
from typing import List


def retrieve_trait_sample_data(dataset,
                               trait_name: str,
                               group_species_id=None,) -> List:
    """given the dataset id and trait_name fetch the\
    sample_name,value from the dataset"""

    # should pass the db as arg all  do a setup

    (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get(
        "id"), dataset.get("type"))

    dataset_query = get_query_for_dataset_sample(dataset_type)
    results = []
    sample_query_values = {
        "Publish": (trait_name, dataset_id),
        "Geno": (group_species_id, trait_name, dataset_name),
        "ProbeSet": (trait_name, dataset_name)
    }

    if dataset_query:
        formatted_query = dataset_query % sample_query_values[dataset_type]
        results = fetch_from_db_sample_data(formatted_query, mock.Mock())

    return results


def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
    """this is the function that does the actual fetching of\
    results from the database"""
    cursor = database_instance.cursor()
    cursor.execute(formatted_query)
    results = cursor.fetchall()

    cursor.close()

    return results


def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
    """this functions contains querys for\
    getting sample data from the db depending in
    dataset"""
    dataset_query = {}

    pheno_query = """
                SELECT
                        Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
                FROM
                        (PublishData, Strain, PublishXRef, PublishFreeze)
                left join PublishSE on
                        (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
                left join NStrain on
                        (NStrain.DataId = PublishData.Id AND
                        NStrain.StrainId = PublishData.StrainId)
                WHERE
                        PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                        PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
                        PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
                Order BY
                        Strain.Name
                """
    geno_query = """
                SELECT
                        Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
                FROM
                        (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
                left join GenoSE on
                        (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
                WHERE
                        Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
                        GenoXRef.GenoFreezeId = GenoFreeze.Id AND
                        GenoFreeze.Name = %s AND
                        GenoXRef.DataId = GenoData.Id AND
                        GenoData.StrainId = Strain.Id
                Order BY
                        Strain.Name
                """

    probeset_query = """
                SELECT
                        Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                FROM
                        (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
                left join ProbeSetSE on
                        (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
                left join NStrain on
                        (NStrain.DataId = ProbeSetData.Id AND
                        NStrain.StrainId = ProbeSetData.StrainId)
                WHERE
                        ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                        ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                        ProbeSetFreeze.Name = '%s' AND
                        ProbeSetXRef.DataId = ProbeSetData.Id AND
                        ProbeSetData.StrainId = Strain.Id
                Order BY
                        Strain.Name
                """

    dataset_query["Publish"] = pheno_query
    dataset_query["Geno"] = geno_query
    dataset_query["ProbeSet"] = probeset_query

    return dataset_query.get(dataset_type)