1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
"""This contains all the necessary functions that are required to add traits
to the published database"""
from dataclasses import dataclass
from typing import Any, Dict, Optional
@dataclass(frozen=True)
class Riset:
"""Class for keeping track of riset. A riset is a group e.g. rat HSNIH-Palmer,
BXD
"""
name: Optional[str]
r_id: Optional[int]
@dataclass(frozen=True)
class WebqtlCaseData:
"""Class for keeping track of one case data in one trait"""
value: Optional[float] = None
variance: Optional[float] = None
count: Optional[int] = None # Number of Individuals
def __str__(self):
_str = ""
if self.value:
_str += f"value={self.value:.3f}"
if self.variance:
_str += f" variance={self.variance:.3f}"
if self.count:
_str += " n_data={self.count}"
return _str
def lookup_webqtldataset_name(riset_name: str, conn: Any):
"""Given a group name(riset), return it's name e.g. BXDPublish,
HLCPublish."""
with conn.cursor() as cursor:
cursor.execute(
"SELECT PublishFreeze.Name FROM "
"PublishFreeze, InbredSet WHERE "
"PublishFreeze.InbredSetId = InbredSet.Id "
"AND InbredSet.Name = '%s'" % riset_name)
_result, *_ = cursor.fetchone()
return _result
def get_riset(data_type: str, name: str, conn: Any):
"""Get the groups given the data type and it's PublishFreeze or GenoFreeze
name
"""
query, _name, _id = None, None, None
if data_type == "Publish":
query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, "
"PublishFreeze WHERE PublishFreeze.InbredSetId = "
"InbredSet.Id AND PublishFreeze.Name = '%s'" % name)
elif data_type == "Geno":
query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, "
"GenoFreeze WHERE GenoFreeze.InbredSetId = "
"InbredSet.Id AND GenoFreeze.Name = '%s'" % name)
elif data_type == "ProbeSet":
query = ("SELECT InbredSet.Name, InbredSet.Id FROM "
"InbredSet, ProbeSetFreeze, ProbeFreeze WHERE "
"ProbeFreeze.InbredSetId = InbredSet.Id AND "
"ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND "
"ProbeSetFreeze.Name = '%s'" % name)
if query:
with conn.cursor() as cursor:
_name, _id = cursor.fetchone()
if _name == "BXD300":
_name = "BXD"
return Riset(_name, _id)
def insert_publication(pubmed_id: int, publication: Optional[Dict],
conn: Any):
"""Creates a new publication record if it's not available"""
sql = ("SELECT Id FROM Publication where "
"PubMed_ID = %d" % pubmed_id)
_id = None
with conn.cursor() as cursor:
cursor.execute(sql)
_id = cursor.fetchone()
if not _id and publication:
# The Publication contains the fields: 'authors', 'title', 'abstract',
# 'journal','volume','pages','month','year'
insert_query = ("INSERT into Publication (%s) Values (%s)" %
(", ".join(publication.keys()),
", ".join(['%s'] * len(publication))))
with conn.cursor() as cursor:
cursor.execute(insert_query, tuple(publication.values()))
def retrieve_type_trait_name(trait_type, threshold, name, connection):
"""
Retrieve the name of a trait given the trait's name
This is extracted from the `webqtlDataset.retrieveName` function as is
implemented at
https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169
"""
columns = "Id, Name, FullName, ShortName{}".format(
", DataScale" if trait_type == "ProbeSet" else "")
query = (
"SELECT {columns} "
"FROM {trait_type}Freeze "
"WHERE "
"public > %(threshold)s "
"AND "
"(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)").format(
columns=columns, trait_type=trait_type)
with connection.cursor() as cursor:
cursor.execute(query, {"threshold": threshold, "name": name})
return cursor.fetchone()
|