Change indentation, wrap code in function and create entry-point.

author: Frederick Muriuki Muriithi 2024-10-29 12:59:54 -0500
committer: Frederick Muriuki Muriithi 2024-10-29 12:59:54 -0500
commit: d2219521f39cad50655384d079573e1428f3c0ff (patch)
tree: 04139b084bd37c9bb6a62f694f7815788d9c558d /scripts
parent: d28391bc9d1ad806765462c73b3c6aa37f74d519 (diff)
download: genenetwork2-d2219521f39cad50655384d079573e1428f3c0ff.tar.gz
1 files changed, 105 insertions, 102 deletions
diff --git a/scripts/maintenance/QTL_Reaper_v6.py b/scripts/maintenance/QTL_Reaper_v6.py
index 20fd8e3b..cfbf31bb 100755
--- a/scripts/maintenance/QTL_Reaper_v6.py
+++ b/scripts/maintenance/QTL_Reaper_v6.py
@@ -7,105 +7,108 @@ import reaper
 import MySQLdb
 import time
 
-con = MySQLdb.Connect(db='db_webqtl', user='username', passwd='', host="localhost")
-cursor = con.cursor()
-
-genotypeDir = '/gnshare/gn/web/genotypes/'
-genotype_1 = reaper.Dataset()
-
-#####get all of the genotypes
-cursor.execute('select Id, Name from InbredSet')
-results = cursor.fetchall()
-InbredSets = {}
-for item in results:
-	InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'
-
-ProbeSetFreezeIds=sys.argv[1:]
-if ProbeSetFreezeIds:
-	#####convert the Ids to integer
-	ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds))
-
-else:
-	#####get all of the dataset that need be updated
-	cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
-	results = cursor.fetchall()
-	ProbeSetFreezeIds = []
-	for item in results:
-		ProbeSetFreezeIds.append(item[0])
-
-####human dataset can NOT use this program calculate the LRS, ignore it
-if ProbeSetFreezeIds.__contains__(215):
-	ProbeSetFreezeIds.remove(215)
-
-#output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')
-
-#####update 
-for ProbeSetFreezeId in ProbeSetFreezeIds:
-	cursor.execute("""
-		select InbredSetId 
-		from ProbeFreeze, ProbeSetFreeze 
-		where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
-	"""%ProbeSetFreezeId);
-
-	InbredSetId = cursor.fetchone()[0]
-	if InbredSetId==3:
-		InbredSetId=1
-	#if InbredSetId==12:
-	#	InbredSetId=2
-
-	print((ProbeSetFreezeId, InbredSets[InbredSetId]))
-
-	genotype_1.read(InbredSets[InbredSetId])
-	locuses = []
-	for geno in genotype_1:
-		for locus in geno:
-			locuses.append(locus.name)
-
-	cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
-	ProbeSetXRefInfos = cursor.fetchall()
-
-	kj=0
-	for aProbeSetXRef in ProbeSetXRefInfos:
-		ProbeSetId, Locus, DataId = aProbeSetXRef
-		prgy = genotype_1.prgy
-
-		cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
-		results = cursor.fetchall()
-		if not results:
-			continue
-		_strains = []
-		_values = []
-		for item2 in results:
-			strain, value = item2
-			if strain in prgy:
-				_strains.append(strain)
-				_values.append(value)
-		if not _strains or not _values:
-			continue
-		
-		if len(_strains) < 8:
-			continue
-		qtlresults = genotype_1.regression(strains = _strains, trait = _values)
-		_max = max(qtlresults)
-		_locus = _max.locus.name
-		_additive = _max.additive
-		_max = _max.lrs
-
-		#output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))
-		
-		# _max(LRS) maybe is infinite sometimes, so define it as a very big number
-		if _max == float('inf'):
-			_max = 10000
-
-		cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s', \
-				(_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))
-
-		kj += 1
-		if kj%1000==0:
-			print((ProbeSetFreezeId, InbredSets[InbredSetId], kj))
-
-
-	print(ProbeSetFreezeIds)
-
-cursor.close()
-con.close()
+
+def run_qtl_reaper(cursor, genotypeDir):
+    genotype_1 = reaper.Dataset()
+    #####get all of the genotypes
+    cursor.execute('select Id, Name from InbredSet')
+    results = cursor.fetchall()
+    InbredSets = {}
+    for item in results:
+            InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'
+
+    ProbeSetFreezeIds=sys.argv[1:]
+    if ProbeSetFreezeIds:
+        #####convert the Ids to integer
+        ProbeSetFreezeIds=list(map(int, ProbeSetFreezeIds))
+    else:
+        #####get all of the dataset that need be updated
+        cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
+        results = cursor.fetchall()
+        ProbeSetFreezeIds = []
+        for item in results:
+            ProbeSetFreezeIds.append(item[0])
+
+    ####human dataset can NOT use this program calculate the LRS, ignore it
+    if ProbeSetFreezeIds.__contains__(215):
+        ProbeSetFreezeIds.remove(215)
+
+    #output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')
+
+    #####update
+    for ProbeSetFreezeId in ProbeSetFreezeIds:
+        cursor.execute("""
+            select InbredSetId
+            from ProbeFreeze, ProbeSetFreeze
+            where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
+        """%ProbeSetFreezeId);
+
+        InbredSetId = cursor.fetchone()[0]
+        if InbredSetId==3:
+            InbredSetId=1
+        #if InbredSetId==12:
+        #	InbredSetId=2
+
+        print((ProbeSetFreezeId, InbredSets[InbredSetId]))
+
+        genotype_1.read(InbredSets[InbredSetId])
+        locuses = []
+        for geno in genotype_1:
+            for locus in geno:
+                locuses.append(locus.name)
+
+        cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
+        ProbeSetXRefInfos = cursor.fetchall()
+
+        kj=0
+        for aProbeSetXRef in ProbeSetXRefInfos:
+            ProbeSetId, Locus, DataId = aProbeSetXRef
+            prgy = genotype_1.prgy
+
+            cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
+            results = cursor.fetchall()
+            if not results:
+                continue
+            _strains = []
+            _values = []
+            for item2 in results:
+                strain, value = item2
+                if strain in prgy:
+                    _strains.append(strain)
+                    _values.append(value)
+            if not _strains or not _values:
+                continue
+
+            if len(_strains) < 8:
+                continue
+            qtlresults = genotype_1.regression(strains = _strains, trait = _values)
+            _max = max(qtlresults)
+            _locus = _max.locus.name
+            _additive = _max.additive
+            _max = _max.lrs
+
+            #output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))
+
+            # _max(LRS) maybe is infinite sometimes, so define it as a very big number
+            if _max == float('inf'):
+                _max = 10000
+
+            cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s',
+                           (_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))
+
+            kj += 1
+            if kj%1000==0:
+                print((ProbeSetFreezeId, InbredSets[InbredSetId], kj))
+
+
+        print(ProbeSetFreezeIds)
+
+    cursor.close()
+
+def main():
+    con = MySQLdb.Connect(db='db_webqtl', user='username', passwd='', host="localhost")
+    run_qtl_reaper(con.cursor(), "/gnshare/gn/web/genotypes/")
+    con.close()
+
+if __name__ == "__main__":
+    main()
author	Frederick Muriuki Muriithi	2024-10-29 12:59:54 -0500
committer	Frederick Muriuki Muriithi	2024-10-29 12:59:54 -0500
commit	d2219521f39cad50655384d079573e1428f3c0ff (patch)
tree	04139b084bd37c9bb6a62f694f7815788d9c558d /scripts
parent	d28391bc9d1ad806765462c73b3c6aa37f74d519 (diff)
download	genenetwork2-d2219521f39cad50655384d079573e1428f3c0ff.tar.gz