scripts/maintenance/QTL_Reaper_v6.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

####### To run this program do: python QTL_Reaper_v2.py 235
#Where 163 is the ProbeSetFreeze Id of the database that we want to calculate #the LRS
#!/usr/bin/python
import sys
import os
import reaper
import MySQLdb
import time


def run_qtl_reaper(cursor, genotypeDir):
    genotype_1 = reaper.Dataset()
    #####get all of the genotypes
    cursor.execute('select Id, Name from InbredSet')
    results = cursor.fetchall()
    InbredSets = {}
    for item in results:
            InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'

    ProbeSetFreezeIds=sys.argv[1:]
    if ProbeSetFreezeIds:
        #####convert the Ids to integer
        ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds))
    else:
        #####get all of the dataset that need be updated
        cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
        results = cursor.fetchall()
        ProbeSetFreezeIds = []
        for item in results:
            ProbeSetFreezeIds.append(item[0])

    ####human dataset can NOT use this program calculate the LRS, ignore it
    if ProbeSetFreezeIds.__contains__(215):
        ProbeSetFreezeIds.remove(215)

    #output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')

    #####update
    for ProbeSetFreezeId in ProbeSetFreezeIds:
        cursor.execute("""
            select InbredSetId
            from ProbeFreeze, ProbeSetFreeze
            where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
        """%ProbeSetFreezeId);

        InbredSetId = cursor.fetchone()[0]
        if InbredSetId==3:
            InbredSetId=1
        #if InbredSetId==12:
        #	InbredSetId=2

        print((ProbeSetFreezeId, InbredSets[InbredSetId]))

        genotype_1.read(InbredSets[InbredSetId])
        locuses = []
        for geno in genotype_1:
            for locus in geno:
                locuses.append(locus.name)

        cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
        ProbeSetXRefInfos = cursor.fetchall()

        kj=0
        for aProbeSetXRef in ProbeSetXRefInfos:
            ProbeSetId, Locus, DataId = aProbeSetXRef
            prgy = genotype_1.prgy

            cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
            results = cursor.fetchall()
            if not results:
                continue
            _strains = []
            _values = []
            for item2 in results:
                strain, value = item2
                if strain in prgy:
                    _strains.append(strain)
                    _values.append(value)
            if not _strains or not _values:
                continue

            if len(_strains) < 8:
                continue
            qtlresults = genotype_1.regression(strains = _strains, trait = _values)
            _max = max(qtlresults)
            _locus = _max.locus.name
            _additive = _max.additive
            _max = _max.lrs

            #output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))

            # _max(LRS) maybe is infinite sometimes, so define it as a very big number
            if _max == float('inf'):
                _max = 10000

            cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s',
                           (_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))

            kj += 1
            if kj%1000==0:
                print((ProbeSetFreezeId, InbredSets[InbredSetId], kj))


        print(ProbeSetFreezeIds)

    cursor.close()

def main():
    con = MySQLdb.Connect(db='db_webqtl', user='username', passwd='', host="localhost")
    run_qtl_reaper(con.cursor(), "/gnshare/gn/web/genotypes/")
    con.close()

if __name__ == "__main__":
    main()