1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
####### To run this program do: python QTL_Reaper_v2.py 235
#Where 163 is the ProbeSetFreeze Id of the database that we want to calculate #the LRS
#!/usr/bin/python
import reaper
import MySQLdb
import contextlib
from argparse import ArgumentParser
def run_qtl_reaper(cursor, genotypeDir, ProbeSetFreezeIds):
genotype_1 = reaper.Dataset()
#####get all of the genotypes
cursor.execute('select Id, Name from InbredSet')
results = cursor.fetchall()
InbredSets = {}
for item in results:
InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'
if ProbeSetFreezeIds:
#####convert the Ids to integer
ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds))
else:
#####get all of the dataset that need be updated
cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
results = cursor.fetchall()
ProbeSetFreezeIds = []
for item in results:
ProbeSetFreezeIds.append(item[0])
####human dataset can NOT use this program calculate the LRS, ignore it
if ProbeSetFreezeIds.__contains__(215):
ProbeSetFreezeIds.remove(215)
#output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')
#####update
for ProbeSetFreezeId in ProbeSetFreezeIds:
cursor.execute("""
select InbredSetId
from ProbeFreeze, ProbeSetFreeze
where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
"""%ProbeSetFreezeId);
InbredSetId = cursor.fetchone()[0]
if InbredSetId==3:
InbredSetId=1
#if InbredSetId==12:
# InbredSetId=2
print((ProbeSetFreezeId, InbredSets[InbredSetId]))
genotype_1.read(InbredSets[InbredSetId])
locuses = []
for geno in genotype_1:
for locus in geno:
locuses.append(locus.name)
cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
ProbeSetXRefInfos = cursor.fetchall()
kj=0
for aProbeSetXRef in ProbeSetXRefInfos:
ProbeSetId, Locus, DataId = aProbeSetXRef
prgy = genotype_1.prgy
cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
results = cursor.fetchall()
if not results:
continue
_strains = []
_values = []
for item2 in results:
strain, value = item2
if strain in prgy:
_strains.append(strain)
_values.append(value)
if not _strains or not _values:
continue
if len(_strains) < 8:
continue
qtlresults = genotype_1.regression(strains = _strains, trait = _values)
_max = max(qtlresults)
_locus = _max.locus.name
_additive = _max.additive
_max = _max.lrs
#output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))
# _max(LRS) maybe is infinite sometimes, so define it as a very big number
if _max == float('inf'):
_max = 10000
cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s',
(_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))
kj += 1
if kj%1000==0:
print((ProbeSetFreezeId, InbredSets[InbredSetId], kj))
print(ProbeSetFreezeIds)
@contextlib.contextmanager
def dbconnection(db, user, passwd, host, port):
"""Connect to the database in a contextmanager."""
conn = MySQLdb.Connect(db=db, user=user, passwd=passwd, host=host, port=port)
try:
yield conn
except MySQLdb.Error as _err:
conn.rollback()
finally:
conn.commit()
conn.close()
def read_db_config(path):
with open(path, "r") as infile:
lines = tuple(line.strip() for line in infile.readlines())
return {
"user": lines[0],
"passwd": lines[1],
"host": lines[2],
"port": int(lines[3] or "3306"),
"db": lines[4]
}
def main():
parser = ArgumentParser(prog="QTLReaper", description="Python-2 QTLReaper.")
parser.add_argument("db_config_file",
type=str,
default="dbconfig",
help="File with database connection configuration")
parser.add_argument("genotype_dir",
type=str,
default="/gnshare/gn/web/genotypes/",
help="Directory with the genotype CSV files.")
parser.add_argument("ProbeSetFreezeIds",
nargs="*",
help="The ProbeSetFreezeIds to act on.")
args = parser.parse_args()
with dbconnection(**read_db_config(args.db_config_file)) as conn:
run_qtl_reaper(conn.cursor(), args.genotype_dir, args.ProbeSetFreezeIds)
if __name__ == "__main__":
main()
|