From d2219521f39cad50655384d079573e1428f3c0ff Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 29 Oct 2024 12:59:54 -0500 Subject: Change indentation, wrap code in function and create entry-point. --- scripts/maintenance/QTL_Reaper_v6.py | 207 ++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 102 deletions(-) (limited to 'scripts/maintenance') diff --git a/scripts/maintenance/QTL_Reaper_v6.py b/scripts/maintenance/QTL_Reaper_v6.py index 20fd8e3b..cfbf31bb 100755 --- a/scripts/maintenance/QTL_Reaper_v6.py +++ b/scripts/maintenance/QTL_Reaper_v6.py @@ -7,105 +7,108 @@ import reaper import MySQLdb import time -con = MySQLdb.Connect(db='db_webqtl', user='username', passwd='', host="localhost") -cursor = con.cursor() - -genotypeDir = '/gnshare/gn/web/genotypes/' -genotype_1 = reaper.Dataset() - -#####get all of the genotypes -cursor.execute('select Id, Name from InbredSet') -results = cursor.fetchall() -InbredSets = {} -for item in results: - InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno' - -ProbeSetFreezeIds=sys.argv[1:] -if ProbeSetFreezeIds: - #####convert the Ids to integer - ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds)) - -else: - #####get all of the dataset that need be updated - cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc') - results = cursor.fetchall() - ProbeSetFreezeIds = [] - for item in results: - ProbeSetFreezeIds.append(item[0]) - -####human dataset can NOT use this program calculate the LRS, ignore it -if ProbeSetFreezeIds.__contains__(215): - ProbeSetFreezeIds.remove(215) - -#output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w') - -#####update -for ProbeSetFreezeId in ProbeSetFreezeIds: - cursor.execute(""" - select InbredSetId - from ProbeFreeze, ProbeSetFreeze - where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d - """%ProbeSetFreezeId); - - InbredSetId = cursor.fetchone()[0] - if InbredSetId==3: - InbredSetId=1 - #if InbredSetId==12: - # InbredSetId=2 - - print((ProbeSetFreezeId, InbredSets[InbredSetId])) - - genotype_1.read(InbredSets[InbredSetId]) - locuses = [] - for geno in genotype_1: - for locus in geno: - locuses.append(locus.name) - - cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId) - ProbeSetXRefInfos = cursor.fetchall() - - kj=0 - for aProbeSetXRef in ProbeSetXRefInfos: - ProbeSetId, Locus, DataId = aProbeSetXRef - prgy = genotype_1.prgy - - cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId) - results = cursor.fetchall() - if not results: - continue - _strains = [] - _values = [] - for item2 in results: - strain, value = item2 - if strain in prgy: - _strains.append(strain) - _values.append(value) - if not _strains or not _values: - continue - - if len(_strains) < 8: - continue - qtlresults = genotype_1.regression(strains = _strains, trait = _values) - _max = max(qtlresults) - _locus = _max.locus.name - _additive = _max.additive - _max = _max.lrs - - #output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive)) - - # _max(LRS) maybe is infinite sometimes, so define it as a very big number - if _max == float('inf'): - _max = 10000 - - cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s', \ - (_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId)) - - kj += 1 - if kj%1000==0: - print((ProbeSetFreezeId, InbredSets[InbredSetId], kj)) - - - print(ProbeSetFreezeIds) - -cursor.close() -con.close() + +def run_qtl_reaper(cursor, genotypeDir): + genotype_1 = reaper.Dataset() + #####get all of the genotypes + cursor.execute('select Id, Name from InbredSet') + results = cursor.fetchall() + InbredSets = {} + for item in results: + InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno' + + ProbeSetFreezeIds=sys.argv[1:] + if ProbeSetFreezeIds: + #####convert the Ids to integer + ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds)) + else: + #####get all of the dataset that need be updated + cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc') + results = cursor.fetchall() + ProbeSetFreezeIds = [] + for item in results: + ProbeSetFreezeIds.append(item[0]) + + ####human dataset can NOT use this program calculate the LRS, ignore it + if ProbeSetFreezeIds.__contains__(215): + ProbeSetFreezeIds.remove(215) + + #output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w') + + #####update + for ProbeSetFreezeId in ProbeSetFreezeIds: + cursor.execute(""" + select InbredSetId + from ProbeFreeze, ProbeSetFreeze + where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d + """%ProbeSetFreezeId); + + InbredSetId = cursor.fetchone()[0] + if InbredSetId==3: + InbredSetId=1 + #if InbredSetId==12: + # InbredSetId=2 + + print((ProbeSetFreezeId, InbredSets[InbredSetId])) + + genotype_1.read(InbredSets[InbredSetId]) + locuses = [] + for geno in genotype_1: + for locus in geno: + locuses.append(locus.name) + + cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId) + ProbeSetXRefInfos = cursor.fetchall() + + kj=0 + for aProbeSetXRef in ProbeSetXRefInfos: + ProbeSetId, Locus, DataId = aProbeSetXRef + prgy = genotype_1.prgy + + cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId) + results = cursor.fetchall() + if not results: + continue + _strains = [] + _values = [] + for item2 in results: + strain, value = item2 + if strain in prgy: + _strains.append(strain) + _values.append(value) + if not _strains or not _values: + continue + + if len(_strains) < 8: + continue + qtlresults = genotype_1.regression(strains = _strains, trait = _values) + _max = max(qtlresults) + _locus = _max.locus.name + _additive = _max.additive + _max = _max.lrs + + #output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive)) + + # _max(LRS) maybe is infinite sometimes, so define it as a very big number + if _max == float('inf'): + _max = 10000 + + cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s', + (_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId)) + + kj += 1 + if kj%1000==0: + print((ProbeSetFreezeId, InbredSets[InbredSetId], kj)) + + + print(ProbeSetFreezeIds) + + cursor.close() + +def main(): + con = MySQLdb.Connect(db='db_webqtl', user='username', passwd='', host="localhost") + run_qtl_reaper(con.cursor(), "/gnshare/gn/web/genotypes/") + con.close() + +if __name__ == "__main__": + main() -- cgit v1.2.3