aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/correlations.py
blob: 67cfef923be9146e3a2b7a991ce960d7f9936a94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
This module will hold functions that are used in the (partial) correlations
feature to access the database to retrieve data needed for computations.
"""

from typing import Any

from gn3.random import random_string
from gn3.db.species import translate_to_mouse_gene_id

def get_filename(target_db_name: str, conn: Any) -> str:
    """
    Retrieve the name of the reference database file with which correlations are
    computed.

    This is a migration of the
    `web.webqtl.correlation.CorrelationPage.getFileName` function in
    GeneNetwork1.
    """
    with conn.cursor() as cursor:
        cursor.execute(
            "SELECT Id, FullName from ProbeSetFreeze WHERE Name-%s",
            target_db_name)
        result = cursor.fetchone()
        if result:
            return "ProbeSetFreezeId_{tid}_FullName_{fname}.txt".format(
                tid=result[0],
                fname=result[1].replace(' ', '_').replace('/', '_'))

    return ""

def build_temporary_literature_table(
        species: str, gene_id: int, return_number: int, conn: Any) -> str:
    """
    Build and populate a temporary table to hold the literature correlation data
    to be used in computations.

    "This is a migration of the
    `web.webqtl.correlation.CorrelationPage.getTempLiteratureTable` function in
    GeneNetwork1.
    """
    def __translated_species_id(row, cursor):
        if species == "mouse":
            return row[1]
        query = {
            "rat": "SELECT rat FROM GeneIDXRef WHERE mouse=%s",
            "human": "SELECT human FROM GeneIDXRef WHERE mouse=%d"}
        if species in query.keys():
            cursor.execute(query[species], row[1])
            record = cursor.fetchone()
            if record:
                return record[0]
            return None
        return None

    temp_table_name = f"TOPLITERATURE{random_string(8)}"
    with conn.cursor as cursor:
        mouse_geneid = translate_to_mouse_gene_id(species, gene_id, conn)
        data_query = (
            "SELECT GeneId1, GeneId2, value FROM LCorrRamin3 "
            "WHERE GeneId1 = %(mouse_gene_id)s "
            "UNION ALL "
            "SELECT GeneId2, GeneId1, value FROM LCorrRamin3 "
            "WHERE GeneId2 = %(mouse_gene_id)s "
            "AND GeneId1 != %(mouse_gene_id)s")
        cursor.execute(
            (f"CREATE TEMPORARY TABLE {temp_table_name} ("
             "GeneId1 int(12) unsigned, "
             "GeneId2 int(12) unsigned PRIMARY KEY, "
             "value double)"))
        cursor.execute(data_query, mouse_gene_id=mouse_geneid)
        literature_data = [
            {"GeneId1": row[0], "GeneId2": row[1], "value": row[2]}
            for row in cursor.fetchall()
            if __translated_species_id(row, cursor)]

        cursor.execute(
            (f"INSERT INTO {temp_table_name} "
             "VALUES (%(GeneId1)s, %(GeneId2)s, %(value)s)"),
            literature_data[0:(2 * return_number)])

    return temp_table_name

def fetch_geno_literature_correlations(temp_table: str) -> str:
    """
    Helper function for `fetch_literature_correlations` below, to build query
    for `Geno*` tables.
    """
    return (
        f"SELECT Geno.Name, {temp_table}.value "
        "FROM Geno, GenoXRef, GenoFreeze "
        f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId "
        "WHERE ProbeSet.GeneId IS NOT NULL "
        f"AND {temp_table}.value IS NOT NULL "
        "AND GenoXRef.GenoFreezeId = GenoFreeze.Id "
        "AND GenoFreeze.Name = %(db_name)s "
        "AND Geno.Id=GenoXRef.GenoId "
        "ORDER BY Geno.Id")

def fetch_probeset_literature_correlations(temp_table: str) -> str:
    """
    Helper function for `fetch_literature_correlations` below, to build query
    for `ProbeSet*` tables.
    """
    return (
        f"SELECT ProbeSet.Name, {temp_table}.value "
        "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
        "LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId "
        "WHERE ProbeSet.GeneId IS NOT NULL "
        "AND {temp_table}.value IS NOT NULL "
        "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
        "AND ProbeSetFreeze.Name = %(db_name)s "
        "AND ProbeSet.Id=ProbeSetXRef.ProbeSetId "
        "ORDER BY ProbeSet.Id")

def fetch_literature_correlations(
        species: str, gene_id: int, dataset: dict, return_number: int,
        conn: Any) -> dict:
    """
    Gather the literature correlation data and pair it with trait id string(s).

    This is a migration of the
    `web.webqtl.correlation.CorrelationPage.fetchLitCorrelations` function in
    GeneNetwork1.
    """
    temp_table = build_temporary_literature_table(
        species, gene_id, return_number, conn)
    query_fns = {
        "Geno": fetch_geno_literature_correlations,
        # "Temp": fetch_temp_literature_correlations,
        # "Publish": fetch_publish_literature_correlations,
        "ProbeSet": fetch_probeset_literature_correlations}
    with conn.cursor as cursor:
        cursor.execute(
            query_fns[dataset["dataset_type"]](temp_table),
            db_name=dataset["dataset_name"])
        results = cursor.fetchall()
        cursor.execute("DROP TEMPORARY TABLE %s", temp_table)
        return dict(results) # {trait_name: lit_corr for trait_name, lit_corr in results}