1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
"Dataset Group class ..."
import os
import json
from base import webqtlConfig
from .markers import Markers, HumanMarkers
from utility import webqtlUtil
from utility import gen_geno_ob
from db import webqtlDatabaseFunction
from maintenance import get_group_samplelists
from wqflask.database import database_connection
from utility.tools import (
locate,
USE_REDIS,
flat_files,
get_setting,
flat_file_exists,
locate_ignore_error)
class DatasetGroup:
"""
Each group has multiple datasets; each species has multiple groups.
For example, Mouse has multiple groups (BXD, BXA, etc), and each group
has multiple datasets associated with it.
"""
def __init__(self, dataset, name=None):
"""This sets self.group and self.group_id"""
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if not name:
cursor.execute(dataset.query_for_group,
(dataset.name,))
else:
cursor.execute(
"SELECT InbredSet.Name, "
"InbredSet.Id, "
"InbredSet.GeneticType, "
"InbredSet.InbredSetCode "
"FROM InbredSet WHERE Name = %s",
(name,))
results = cursor.fetchone()
if results:
(self.name, self.id, self.genetic_type, self.code) = results
else:
self.name = name or dataset.name
if self.name == 'BXD300':
self.name = "BXD"
self.f1list = None
self.parlist = None
self.get_f1_parent_strains()
self.mapping_id, self.mapping_names = self.get_mapping_methods()
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
self.incparentsf1 = False
self.allsamples = None
self._datasets = None
self.genofile = None
def get_mapping_methods(self):
mapping_id = ()
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT MappingMethodId FROM "
"InbredSet WHERE Name= %s",
(self.name,))
results = cursor.fetchone()
if results and results[0]:
mapping_id = results[0]
if mapping_id == "1":
mapping_names = ["GEMMA", "QTLReaper", "R/qtl"]
elif mapping_id == "2":
mapping_names = ["GEMMA"]
elif mapping_id == "3":
mapping_names = ["R/qtl"]
elif mapping_id == "4":
mapping_names = ["GEMMA", "PLINK"]
else:
mapping_names = []
return mapping_id, mapping_names
def get_markers(self):
def check_plink_gemma():
if flat_file_exists("mapping"):
MAPPING_PATH = flat_files("mapping") + "/"
if os.path.isfile(MAPPING_PATH + self.name + ".bed"):
return True
return False
if check_plink_gemma():
marker_class = HumanMarkers
else:
marker_class = Markers
if self.genofile:
self.markers = marker_class(self.genofile[:-5])
else:
self.markers = marker_class(self.name)
def get_f1_parent_strains(self):
try:
# NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
except KeyError:
f1 = f12 = maternal = paternal = None
if f1 and f12:
self.f1list = [f1, f12]
if maternal and paternal:
self.parlist = [maternal, paternal]
def get_study_samplelists(self):
study_sample_file = locate_ignore_error(
self.name + ".json", 'study_sample_lists')
try:
f = open(study_sample_file)
except:
return []
study_samples = json.load(f)
return study_samples
def get_genofiles(self):
jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
try:
f = open(jsonfile)
except:
return None
jsondata = json.load(f)
return jsondata['genofile']
def get_samplelist(self, redis_conn):
result = None
key = "samplelist:v3:" + self.name
if USE_REDIS:
result = redis_conn.get(key)
if result is not None:
self.samplelist = json.loads(result)
else:
genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype')
if genotype_fn:
self.samplelist = get_group_samplelists.get_samplelist(
"geno", genotype_fn)
else:
self.samplelist = None
if USE_REDIS:
redis_conn.set(key, json.dumps(self.samplelist))
redis_conn.expire(key, 60 * 5)
def all_samples_ordered(self):
result = []
lists = (self.parlist, self.f1list, self.samplelist)
[result.extend(l) for l in lists if l]
return result
def read_genotype_file(self, use_reaper=False):
'''Read genotype from .geno file instead of database'''
# genotype_1 is Dataset Object without parents and f1
# genotype_2 is Dataset Object with parents and f1 (not for intercross)
# reaper barfs on unicode filenames, so here we ensure it's a string
if self.genofile:
if "RData" in self.genofile: # ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData
full_filename = str(
locate(self.genofile.split(".")[0] + ".geno", 'genotype'))
else:
full_filename = str(locate(self.genofile, 'genotype'))
else:
full_filename = str(locate(self.name + '.geno', 'genotype'))
genotype_1 = gen_geno_ob.genotype(full_filename)
if genotype_1.type == "group" and self.parlist:
genotype_2 = genotype_1.add(
Mat=self.parlist[0], Pat=self.parlist[1]) # , F1=_f1)
else:
genotype_2 = genotype_1
# determine default genotype object
if self.incparentsf1 and genotype_1.type != "intercross":
genotype = genotype_2
else:
self.incparentsf1 = 0
genotype = genotype_1
self.samplelist = list(genotype.prgy)
return genotype
|