aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/wiki.py
blob: 4721a699f446e868f9028cfd79c87bf07fd0d25f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Helper functions to access wiki entries"""

from typing import Dict, List

from MySQLdb.cursors import DictCursor


class MissingDBDataException(Exception):
    """Error due to DB missing some data"""


def _decode_dict(result: dict):
    new_result = {}
    for k, v in result.items():
        if isinstance(v, bytes):
            new_result[k] = v.decode()
        else:
            new_result[k] = v
    return new_result


def get_latest_comment(connection, comment_id: int) -> int:
    """ Latest comment is one with the highest versionId """
    cursor = connection.cursor(DictCursor)
    query = """ SELECT versionId AS version, symbol, PubMed_ID AS pubmed_ids, sp.Name AS species,
        comment, email, weburl, initial, reason
        FROM `GeneRIF` gr
		INNER JOIN Species sp USING(SpeciesId)
		WHERE gr.Id = %s
		ORDER BY versionId DESC LIMIT 1;
    """
    cursor.execute(query, (str(comment_id),))
    result = _decode_dict(cursor.fetchone())
    if (pubmed_ids := result.get("pubmed_ids")) is None:
        pubmed_ids = ""
    result["pubmed_ids"] = [x.strip() for x in pubmed_ids.split()]
    categories_query = """
        SELECT grx.GeneRIFId, grx.versionId, gc.Name FROM GeneRIFXRef grx
                INNER JOIN GeneCategory gc ON grx.GeneCategoryId=gc.Id
                WHERE GeneRIFId = %s AND versionId=%s;
    """

    cursor.execute(categories_query, (str(comment_id), result["version"]))
    categories = [_decode_dict(x) for x in cursor.fetchall()]
    result["categories"] = [x["Name"] for x in categories]
    return result


def get_species_id(cursor, species_name: str) -> int:
    """Find species id given species `Name`"""
    cursor.execute(
        "SELECT SpeciesID from Species  WHERE Name = %s", (species_name,))
    species_ids = cursor.fetchall()
    if len(species_ids) != 1:
        raise MissingDBDataException(
            f"expected 1 species with Name={species_name} but found {len(species_ids)}!"
        )
    return species_ids[0][0]


def get_next_comment_version(cursor, comment_id: int) -> int:
    """Find the version to add, usually latest_version + 1"""
    cursor.execute(
        "SELECT MAX(versionId) as version_id from GeneRIF WHERE Id = %s", (comment_id,)
    )
    latest_version = cursor.fetchone()[0]
    if latest_version is None:
        raise MissingDBDataException(
            f"No comment found with comment_id={comment_id}")
    return latest_version + 1


def get_categories_ids(cursor, categories: List[str]) -> List[int]:
    """Get the categories_ids from a list of category strings"""
    dict_cats = get_categories(cursor)
    category_ids = []
    for category in set(categories):
        cat_id = dict_cats.get(category.strip())
        if cat_id is None:
            raise MissingDBDataException(
                f"Category with Name={category} not found")
        category_ids.append(cat_id)
    return category_ids


def get_categories(cursor) -> Dict[str, int]:
    """Get all categories"""
    cursor.execute("SELECT Name, Id from GeneCategory")
    raw_categories = cursor.fetchall()
    dict_cats = dict(raw_categories)
    return dict_cats


def get_species(cursor) -> Dict[str, str]:
    """Get all species"""
    cursor.execute("SELECT Name, SpeciesName from Species")
    raw_species = cursor.fetchall()
    dict_cats = dict(raw_species)
    return dict_cats