aboutsummaryrefslogtreecommitdiff
####### To run this program do: python QTL_Reaper_v2.py 235
#Where 163 is the ProbeSetFreeze Id of the database that we want to calculate #the LRS
#!/usr/bin/python
import reaper
import MySQLdb
import contextlib
from argparse import ArgumentParser


def run_qtl_reaper(cursor, genotypeDir, ProbeSetFreezeIds):
    genotype_1 = reaper.Dataset()
    #####get all of the genotypes
    cursor.execute('select Id, Name from InbredSet')
    results = cursor.fetchall()
    InbredSets = {}
    for item in results:
            InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'

    if ProbeSetFreezeIds:
        #####convert the Ids to integer
        ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds))
    else:
        #####get all of the dataset that need be updated
        cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
        results = cursor.fetchall()
        ProbeSetFreezeIds = []
        for item in results:
            ProbeSetFreezeIds.append(item[0])

    ####human dataset can NOT use this program calculate the LRS, ignore it
    if ProbeSetFreezeIds.__contains__(215):
        ProbeSetFreezeIds.remove(215)

    #output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')

    #####update
    for ProbeSetFreezeId in ProbeSetFreezeIds:
        cursor.execute("""
            select InbredSetId
            from ProbeFreeze, ProbeSetFreeze
            where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
        """%ProbeSetFreezeId);

        InbredSetId = cursor.fetchone()[0]
        if InbredSetId==3:
            InbredSetId=1
        #if InbredSetId==12:
        #	InbredSetId=2

        print((ProbeSetFreezeId, InbredSets[InbredSetId]))

        genotype_1.read(InbredSets[InbredSetId])
        locuses = []
        for geno in genotype_1:
            for locus in geno:
                locuses.append(locus.name)

        cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
        ProbeSetXRefInfos = cursor.fetchall()

        kj=0
        for aProbeSetXRef in ProbeSetXRefInfos:
            ProbeSetId, Locus, DataId = aProbeSetXRef
            prgy = genotype_1.prgy

            cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
            results = cursor.fetchall()
            if not results:
                continue
            _strains = []
            _values = []
            for item2 in results:
                strain, value = item2
                if strain in prgy:
                    _strains.append(strain)
                    _values.append(value)
            if not _strains or not _values:
                continue

            if len(_strains) < 8:
                continue
            qtlresults = genotype_1.regression(strains = _strains, trait = _values)
            _max = max(qtlresults)
            _locus = _max.locus.name
            _additive = _max.additive
            _max = _max.lrs

            #output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))

            # _max(LRS) maybe is infinite sometimes, so define it as a very big number
            if _max == float('inf'):
                _max = 10000

            cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s',
                           (_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))

            kj += 1
            if kj%1000==0:
                print((ProbeSetFreezeId, InbredSets[InbredSetId], kj))


        print(ProbeSetFreezeIds)


@contextlib.contextmanager
def dbconnection(db, user, passwd, host, port):
    """Connect to the database in a contextmanager."""
    conn = MySQLdb.Connect(db=db, user=user, passwd=passwd, host=host, port=port)
    try:
        yield conn
    except MySQLdb.Error as _err:
        conn.rollback()
    finally:
        conn.commit()
        conn.close()


def read_db_config(path):
    with open(path, "r") as infile:
        lines = tuple(line.strip() for line in infile.readlines())
        return {
            "user": lines[0],
            "passwd": lines[1],
            "host": lines[2],
            "port": int(lines[3] or "3306"),
            "db": lines[4]
        }


def main():
    parser = ArgumentParser(prog="QTLReaper", description="Python-2 QTLReaper.")
    parser.add_argument("db_config_file",
                        type=str,
                        default="dbconfig",
                        help="File with database connection configuration")
    parser.add_argument("genotype_dir",
                        type=str,
                        default="/gnshare/gn/web/genotypes/",
                        help="Directory with the genotype CSV files.")
    parser.add_argument("ProbeSetFreezeIds",
                        nargs="*",
                        help="The ProbeSetFreezeIds to act on.")
    args = parser.parse_args()
    with dbconnection(**read_db_config(args.db_config_file)) as conn:
        run_qtl_reaper(conn.cursor(), args.genotype_dir, args.ProbeSetFreezeIds)

if __name__ == "__main__":
    main()