about summary refs log tree commit diff
path: root/scripts/maintenance/QTL_Reaper_v6.py
diff options
context:
space:
mode:
authorPjotr Prins2018-04-03 07:50:28 +0000
committerPjotr Prins2018-04-03 07:50:28 +0000
commitfb57f05083b0512b7bb9f9e15b6cc6efaded5a1f (patch)
treeb2d5e7cf8f9a52a44b719103809276315de7f325 /scripts/maintenance/QTL_Reaper_v6.py
parent9544ca2e6461b267c28d9a4c7fe976bc34f10be3 (diff)
downloadgenenetwork2-fb57f05083b0512b7bb9f9e15b6cc6efaded5a1f.tar.gz
@acenteno added data upload scripts into main repo
Diffstat (limited to 'scripts/maintenance/QTL_Reaper_v6.py')
-rwxr-xr-xscripts/maintenance/QTL_Reaper_v6.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/scripts/maintenance/QTL_Reaper_v6.py b/scripts/maintenance/QTL_Reaper_v6.py
new file mode 100755
index 00000000..e50dbd40
--- /dev/null
+++ b/scripts/maintenance/QTL_Reaper_v6.py
@@ -0,0 +1,108 @@
+####### To run this program do: python QTL_Reaper_v2.py 235
+#Where 163 is the ProbeSetFreeze Id of the database that we want to calculate #the LRS
+#!/usr/bin/python
+import sys
+import os
+import reaper
+import MySQLdb
+import time
+
+con = MySQLdb.Connect(db='db_webqtl',user='username',passwd='', host="localhost")
+cursor = con.cursor()
+
+genotypeDir = '/gnshare/gn/web/genotypes/'
+genotype_1 = reaper.Dataset()
+
+#####get all of the genotypes
+cursor.execute('select Id, Name from InbredSet')
+results = cursor.fetchall()
+InbredSets = {}
+for item in results:
+	InbredSets[item[0]] = genotypeDir+str(item[1])+'.geno'
+
+ProbeSetFreezeIds=sys.argv[1:]
+if ProbeSetFreezeIds:
+	#####convert the Ids to integer
+	ProbeSetFreezeIds=map(int, ProbeSetFreezeIds)
+
+else:
+	#####get all of the dataset that need be updated
+	cursor.execute('select distinct(ProbeSetFreezeId) from ProbeSetXRef where pValue is NULL order by ProbeSetFreezeId desc')
+	results = cursor.fetchall()
+	ProbeSetFreezeIds = []
+	for item in results:
+		ProbeSetFreezeIds.append(item[0])
+
+####human dataset can NOT use this program calculate the LRS, ignore it
+if ProbeSetFreezeIds.__contains__(215):
+	ProbeSetFreezeIds.remove(215)
+
+#output_file = open ('/home/xzhou/work/DatabaseTools/cal_LRS_Additive_result.txt', 'w')
+
+#####update 
+for ProbeSetFreezeId in ProbeSetFreezeIds:
+	cursor.execute("""
+		select InbredSetId 
+		from ProbeFreeze, ProbeSetFreeze 
+		where ProbeFreeze.Id=ProbeSetFreeze.ProbeFreezeId and ProbeSetFreeze.Id=%d
+	"""%ProbeSetFreezeId);
+
+	InbredSetId = cursor.fetchone()[0]
+	if InbredSetId==3:
+		InbredSetId=1
+	#if InbredSetId==12:
+	#	InbredSetId=2
+
+	print ProbeSetFreezeId, InbredSets[InbredSetId]
+
+	genotype_1.read(InbredSets[InbredSetId])
+	locuses = []
+	for geno in genotype_1:
+		for locus in geno:
+			locuses.append(locus.name)
+
+	cursor.execute('select ProbeSetId, Locus, DataId from ProbeSetXRef where ProbeSetFreezeId=%s'%ProbeSetFreezeId)
+	ProbeSetXRefInfos = cursor.fetchall()
+
+	kj=0
+	for aProbeSetXRef in ProbeSetXRefInfos:
+		ProbeSetId, Locus, DataId = aProbeSetXRef
+		prgy = genotype_1.prgy
+
+		cursor.execute("select Strain.Name, ProbeSetData.value from Strain, ProbeSetData where Strain.Id = ProbeSetData.StrainId and ProbeSetData.Id = %d" % DataId)
+		results = cursor.fetchall()
+		if not results:
+			continue
+		_strains = []
+		_values = []
+		for item2 in results:
+			strain, value = item2
+			if strain in prgy:
+				_strains.append(strain)
+				_values.append(value)
+		if not _strains or not _values:
+			continue
+		
+		if len(_strains) < 8:
+			continue
+		qtlresults = genotype_1.regression(strains = _strains, trait = _values)
+		_max = max(qtlresults)
+		_locus = _max.locus.name
+		_additive = _max.additive
+		_max = _max.lrs
+
+		#output_file.write('%s\t%s\t%s\t%s\t%s\n' % (ProbeSetFreezeId, ProbeSetId, _locus, _max, _additive))
+		
+		# _max(LRS) maybe is infinite sometimes, so define it as a very big number
+		if _max == float('inf'):
+			_max = 10000
+
+		cursor.execute('update ProbeSetXRef set Locus=%s, LRS=%s, additive=%s where ProbeSetId=%s and ProbeSetFreezeId=%s', \
+				(_locus, _max, _additive, ProbeSetId, ProbeSetFreezeId))
+
+		kj += 1
+		if kj%1000==0:
+			print ProbeSetFreezeId, InbredSets[InbredSetId],kj
+
+
+	print ProbeSetFreezeIds