1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
import string
from gn2.wqflask.database import database_connection
from gn2.utility.tools import flat_files, get_setting
def load_homology(chr_name, start_mb, end_mb, source_file):
homology_list = []
with open(flat_files("homology/") + source_file) as h_file:
current_chr = 0
for line in h_file:
line_items = line.split()
this_dict = {
"ref_chr": line_items[2][3:],
"ref_strand": line_items[4],
"ref_start": float(line_items[5])/1000000,
"ref_end": float(line_items[6])/1000000,
"query_chr": line_items[7][3:],
"query_strand": line_items[9],
"query_start": float(line_items[10])/1000000,
"query_end": float(line_items[11])/1000000
}
if str(this_dict["ref_chr"]) == str(chr_name) and \
this_dict["ref_start"] < end_mb and \
this_dict["ref_end"] > start_mb:
homology_list.append(this_dict)
return homology_list
def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'):
assembly_map = {
"mouse": "mm10",
"rat": "rn7"
}
def append_assembly(fetch_fields, species):
query_fields = []
for field in fetch_fields:
if field in ['Chr', 'TxStart', 'TxEnd', 'Strand']:
query_fields.append(field + "_" + assembly_map[species])
else:
query_fields.append(field)
return query_fields
fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chr', 'TxStart', 'TxEnd',
'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
# List All Species in the Gene Table
speciesDict = {}
results = []
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute("SELECT Species.Name, GeneList081722.SpeciesId "
"FROM Species, GeneList081722 WHERE "
"GeneList081722.SpeciesId = Species.Id "
"GROUP BY GeneList081722.SpeciesId")
results = cursor.fetchall()
for item in results:
if item[0] == "rat":
speciesDict[item[0]] = (item[1], "rn7")
else:
speciesDict[item[0]] = (item[1], "mm10")
# List current Species and other Species
speciesId, assembly = speciesDict[species]
otherSpecies = [[X, speciesDict[X][0], speciesDict[X][1]] for X in list(speciesDict.keys())]
otherSpecies.remove([species, speciesId, assembly])
query_fields = append_assembly(fetchFields, species)
cursor.execute(f"SELECT {', '.join(query_fields)} FROM GeneList081722 "
"WHERE SpeciesId = %s AND "
f"Chr_{assembly}" + " = %s AND "
f"((TxStart_{assembly}" + " > %s and " + f"TxStart_{assembly}" + " <= %s) "
f"OR (TxEnd_{assembly}" + " > %s and " + f"TxEnd_{assembly}" + " <= %s)) "
f"ORDER BY TxStart_{assembly}",
(speciesId, chrName,
startMb, endMb,
startMb, endMb))
results = cursor.fetchall()
GeneList = []
if results:
for result in results:
newdict = {}
for j, item in enumerate(fetchFields):
newdict[item] = result[j]
# count SNPs if possible
if diffCol and species == 'mouse':
cursor.execute(
"SELECT count(*) FROM BXDSnpPosition "
"WHERE Chr = %s AND "
"Mb >= %s AND Mb < %s "
"AND StrainId1 = %s AND StrainId2 = %s",
(chrName, f"{newdict['TxStart']:2.6f}",
f"{newdict['TxEnd']:2.6f}",
diffCol[0], diffCol[1],))
newdict["snpCount"] = cursor.fetchone()[0]
newdict["snpDensity"] = (
newdict["snpCount"] /
(newdict["TxEnd"] - newdict["TxStart"]) / 1000.0)
else:
newdict["snpDensity"] = newdict["snpCount"] = 0
try:
newdict['GeneLength'] = 1000.0 * \
(newdict['TxEnd'] - newdict['TxStart'])
except:
pass
# load gene from other Species by the same name
for item in otherSpecies:
othSpec, othSpecId, othSpecAssembly = item
newdict2 = {}
query_fields = append_assembly(fetchFields, othSpec)
cursor.execute(
f"SELECT {', '.join(query_fields)} FROM GeneList081722 WHERE "
"SpeciesId = %s AND "
"geneSymbol= %s LIMIT 1",
(othSpecId,
newdict["GeneSymbol"]))
resultsOther = cursor.fetchone()
if resultsOther:
for j, item in enumerate(fetchFields):
newdict2[item] = resultsOther[j]
# count SNPs if possible, could be a separate function
if diffCol and othSpec == 'mouse':
cursor.execute(
"SELECT count(*) FROM BXDSnpPosition "
"WHERE Chr = %s AND Mb >= %s AND "
"Mb < %s AND StrainId1 = %s "
"AND StrainId2 = %s",
(chrName, f"{newdict['TxStart']:2.6f}",
f"{newdict['TxEnd']:2.6f}",
diffCol[0], diffCol[1]))
if snp_count := cursor.fetchone():
newdict2["snpCount"] = snp_count[0]
newdict2["snpDensity"] = (
newdict2["snpCount"]
/ (newdict2["TxEnd"] - newdict2["TxStart"])
/ 1000.0)
else:
newdict2["snpDensity"] = newdict2["snpCount"] = 0
try:
newdict2['GeneLength'] = (
1000.0 * (newdict2['TxEnd'] - newdict2['TxStart']))
except:
pass
newdict['%sGene' % othSpec] = newdict2
GeneList.append(newdict)
return GeneList
|