about summary refs log tree commit diff
path: root/scripts/maintenance/readProbeSetMean_v7.py
diff options
context:
space:
mode:
authorBonfaceKilz2020-10-27 01:18:38 +0300
committerGitHub2020-10-27 01:18:38 +0300
commit37c391bc62e9080effcf83c6ff0056ab8841b7fb (patch)
tree1e794c5616c25e82869314a2f4e91f64c4d40ea9 /scripts/maintenance/readProbeSetMean_v7.py
parent85896707ef1f9e214b45298f6b5b1a9dc37bc839 (diff)
parentb369489e6c075eee3f58bb33e493c901b052b0a1 (diff)
downloadgenenetwork2-37c391bc62e9080effcf83c6ff0056ab8841b7fb.tar.gz
Merge pull request #422 from BonfaceKilz/build/python3-migration
Build/python3 migration
Diffstat (limited to 'scripts/maintenance/readProbeSetMean_v7.py')
-rwxr-xr-xscripts/maintenance/readProbeSetMean_v7.py292
1 files changed, 145 insertions, 147 deletions
diff --git a/scripts/maintenance/readProbeSetMean_v7.py b/scripts/maintenance/readProbeSetMean_v7.py
index e9c8f25c..43f084f4 100755
--- a/scripts/maintenance/readProbeSetMean_v7.py
+++ b/scripts/maintenance/readProbeSetMean_v7.py
@@ -9,19 +9,17 @@ import sys
 import MySQLdb
 import getpass
 import time
-#import pdb
 
-#pdb.set_trace()
 
 ########################################################################
 
 def translateAlias(str):
-	if str == "B6":
-		return "C57BL/6J"
-	elif str == "D2":
-		return "DBA/2J"
-	else:
-		return str
+    if str == "B6":
+        return "C57BL/6J"
+    elif str == "D2":
+        return "DBA/2J"
+    else:
+        return str
 
 ########################################################################
 #
@@ -29,23 +27,25 @@ def translateAlias(str):
 #
 ########################################################################
 
+
 dataStart = 1
 
-GeneChipId = int( raw_input("Enter GeneChipId:") )
-ProbeSetFreezeId = int( raw_input("Enter ProbeSetFreezeId:") )
-input_file_name = raw_input("Enter file name with suffix:")
+GeneChipId = int(input("Enter GeneChipId:"))
+ProbeSetFreezeId = int(input("Enter ProbeSetFreezeId:"))
+input_file_name = input("Enter file name with suffix:")
 
 fp = open("%s" % input_file_name, 'rb')
 
 try:
-        passwd = getpass.getpass('Please enter mysql password here : ') 
-        con = MySQLdb.Connect(db='db_webqtl',host='localhost', user='username',passwd=passwd)
+    passwd = getpass.getpass('Please enter mysql password here : ')
+    con = MySQLdb.Connect(db='db_webqtl', host='localhost',
+                          user='username', passwd=passwd)
 
-        db = con.cursor()
-        print "You have successfully connected to mysql.\n"
+    db = con.cursor()
+    print("You have successfully connected to mysql.\n")
 except:
-        print "You entered incorrect password.\n"
-        sys.exit(0)
+    print("You entered incorrect password.\n")
+    sys.exit(0)
 
 time0 = time.time()
 
@@ -55,163 +55,163 @@ time0 = time.time()
 #  generate the gene list of expression data here
 #
 #########################################################################
-print 'Checking if each line have same number of members'
+print('Checking if each line have same number of members')
 
 GeneList = []
 isCont = 1
 header = fp.readline()
-header = string.split(string.strip(header),'\t')
-header = map(string.strip, header)
+header = header.strip().split('\t')
+header = [x.strip() for x in header]
 nfield = len(header)
 line = fp.readline()
 
-kj=0
+kj = 0
 while line:
-	line2 = string.split(string.strip(line),'\t')
-	line2 = map(string.strip, line2)
-	if len(line2) != nfield:
-		print "Error : " + line
-		isCont = 0
+    line2 = line.strip().split('\t')
+    line2 = [x.strip() for x in line2]
+    if len(line2) != nfield:
+        print(("Error : " + line))
+        isCont = 0
 
-	GeneList.append(line2[0])
-	line = fp.readline()
+    GeneList.append(line2[0])
+    line = fp.readline()
 
-	kj+=1
-	if kj%100000 == 0:
-		print 'checked ',kj,' lines'
+    kj += 1
+    if kj % 100000 == 0:
+        print(('checked ', kj, ' lines'))
 
-GeneList = map(string.lower, GeneList)
-GeneList.sort()
-	
-if isCont==0:
-	sys.exit(0)
+GeneList = sorted(map(string.lower, GeneList))
 
+if isCont == 0:
+    sys.exit(0)
 
-print 'used ',time.time()-time0,' seconds'
+
+print(('used ', time.time()-time0, ' seconds'))
 #########################################################################
 #
 #  Check if each strain exist in database
 #  generate the string id list of expression data here
 #
 #########################################################################
-print 'Checking if each strain exist in database'
+print('Checking if each strain exist in database')
 
 isCont = 1
 fp.seek(0)
 header = fp.readline()
-header = string.split(string.strip(header),'\t')
-header = map(string.strip, header)
-header = map(translateAlias, header)
+header = header.strip().split('\t')
+header = [x.strip() for x in header]
+header = list(map(translateAlias, header))
 header = header[dataStart:]
 Ids = []
 for item in header:
-	try:
-		db.execute('select Id from Strain where Name = "%s"' % item)
-		Ids.append(db.fetchall()[0][0])
-	except:
-		print item,'does not exist, check the if the strain name is correct'
-		isCont=0
+    try:
+        db.execute('select Id from Strain where Name = "%s"' % item)
+        Ids.append(db.fetchall()[0][0])
+    except:
+        print((item, 'does not exist, check the if the strain name is correct'))
+        isCont = 0
 
-if isCont==0:
-	sys.exit(0)
+if isCont == 0:
+    sys.exit(0)
 
 
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
 ########################################################################
 #
 # Check if each ProbeSet exist in database
 #
 ########################################################################
-print 'Check if each ProbeSet exist in database'
+print('Check if each ProbeSet exist in database')
 
 ##---- find PID is name or target ----##
 line = fp.readline()
 line = fp.readline()
-line2 = string.split(string.strip(line),'\t')
-line2 = map(string.strip, line2)
+line2 = line.strip().split('\t')
+line2 = [x.strip() for x in line2]
 PId = line2[0]
 
-db.execute('select Id from ProbeSet where Name="%s" and ChipId=%d' % (PId, GeneChipId) )
+db.execute('select Id from ProbeSet where Name="%s" and ChipId=%d' %
+           (PId, GeneChipId))
 results = db.fetchall()
 IdStr = 'TargetId'
-if len(results)>0:
-	IdStr = 'Name'
+if len(results) > 0:
+    IdStr = 'Name'
 
 
 ##---- get Name/TargetId list from database ----##
-db.execute('select distinct(%s) from ProbeSet where ChipId=%d order by %s' % (IdStr, GeneChipId, IdStr))
+db.execute('select distinct(%s) from ProbeSet where ChipId=%d order by %s' % (
+    IdStr, GeneChipId, IdStr))
 results = db.fetchall()
-	
+
 Names = []
 for item in results:
-	Names.append(item[0])
-	
-print Names
+    Names.append(item[0])
 
-Names = map(string.lower, Names)
+print(Names)
 
-Names.sort() # -- Fixed the lower case problem of ProbeSets affx-mur_b2_at  doesn't exist --#
+Names = sorted(map(string.lower, Names))
 
 
 ##---- compare genelist with names ----##
-x=y=0
-x1=-1
-GeneList2=[]
-while x<len(GeneList) and y<len(Names):
-	if GeneList[x]==Names[y]:
-		x += 1
-		y += 1
-	elif GeneList[x]<Names[y]:
-		if x!=x1:
-			GeneList2.append(GeneList[x])
-			x1 = x
-		x += 1
-	elif GeneList[x]>Names[y]:
-		y += 1
-
-	if x%100000==0:
-		print 'check Name, checked %d lines'%x
-
-while x<len(GeneList):
-	GeneList2.append(GeneList[x])
-	x += 1
-
-isCont=1
+x = y = 0
+x1 = -1
+GeneList2 = []
+while x < len(GeneList) and y < len(Names):
+    if GeneList[x] == Names[y]:
+        x += 1
+        y += 1
+    elif GeneList[x] < Names[y]:
+        if x != x1:
+            GeneList2.append(GeneList[x])
+            x1 = x
+        x += 1
+    elif GeneList[x] > Names[y]:
+        y += 1
+
+    if x % 100000 == 0:
+        print(('check Name, checked %d lines' % x))
+
+while x < len(GeneList):
+    GeneList2.append(GeneList[x])
+    x += 1
+
+isCont = 1
 ferror = open("ProbeSetError.txt", "wb")
 for item in GeneList2:
-	ferror.write(item + " doesn't exist \n")
-	print item, " doesn't exist, check if the ProbeSet name is correct"
-	isCont = 0
-        
-if isCont==0:
-	sys.exit(0)
+    ferror.write(item + " doesn't exist \n")
+    print((item, " doesn't exist, check if the ProbeSet name is correct"))
+    isCont = 0
+
+if isCont == 0:
+    sys.exit(0)
 
 
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
 #########################################################################
 #
 # Insert data into database
 #
 #########################################################################
-print 'getting ProbeSet/Id'
+print('getting ProbeSet/Id')
 
 
 #---- get Name/Id map ----#
-db.execute('select %s, Id from ProbeSet where ChipId=%d order by %s' % (IdStr, GeneChipId, IdStr))
+db.execute('select %s, Id from ProbeSet where ChipId=%d order by %s' %
+           (IdStr, GeneChipId, IdStr))
 results = db.fetchall()
 NameIds = {}
 for item in results:
-	NameIds[item[0]] = item[1]
-print 'used ',time.time()-time0,' seconds'
+    NameIds[item[0]] = item[1]
+print(('used ', time.time()-time0, ' seconds'))
 
 
-print 'inserting data'
+print('inserting data')
 
 ##---- get old max dataId ----##
 db.execute('select max(Id) from ProbeSetData')
 maxDataId = int(db.fetchall()[0][0])
 bmax = maxDataId
-print "old_max = %d\n" % bmax
+print(("old_max = %d\n" % bmax))
 
 ##---- insert data ----##
 fp.seek(0)
@@ -222,53 +222,51 @@ kj = 0
 values1 = []
 values2 = []
 while line:
-	line2 = string.split(string.strip(line),'\t')
-	line2 = map(string.strip, line2)
-	PId = line2[0]
-	recordId = NameIds[PId]
-
-	maxDataId += 1
-	datasorig = line2[dataStart:]
-	
-	###### Data Table items ######
-	i=0
-	for item in datasorig:
-		try:
-			values1.append('(%d,%d,%s)' % (maxDataId, Ids[i], float(item)))
-      		except:
-			pass
-		i += 1
-
-	values2.append("(%d,%d,%d)" % (ProbeSetFreezeId, recordId, maxDataId))
-
-
-	##---- insert into table ----##
-	kj += 1
-	if kj % 100 == 0:
-		cmd = ','.join(values1)
-		cmd = 'insert into ProbeSetData values %s' % cmd
-		db.execute(cmd)
-
-		cmd = ','.join(values2)
-		cmd = 'insert into ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) values %s' % cmd
-		db.execute(cmd)
-
-		values1=[]
-		values2=[]
-		print 'Inserted ', kj,' lines'
-		print 'used ',time.time()-time0,' seconds'
-	
-	line = fp.readline()
-
-
-
-if len(values1)>0:
-	cmd = ','.join(values1)
-	cmd = 'insert into ProbeSetData values %s' % cmd
-	db.execute(cmd)
-
-	cmd = ','.join(values2)
-	cmd = 'insert into ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) values %s' % cmd
-	db.execute(cmd)
+    line2 = line.strip().split('\t')
+    line2 = [x.strip() for x in line2]
+    PId = line2[0]
+    recordId = NameIds[PId]
+
+    maxDataId += 1
+    datasorig = line2[dataStart:]
+
+    ###### Data Table items ######
+    i = 0
+    for item in datasorig:
+        try:
+            values1.append('(%d,%d,%s)' % (maxDataId, Ids[i], float(item)))
+        except:
+            pass
+        i += 1
+
+    values2.append("(%d,%d,%d)" % (ProbeSetFreezeId, recordId, maxDataId))
+
+    ##---- insert into table ----##
+    kj += 1
+    if kj % 100 == 0:
+        cmd = ','.join(values1)
+        cmd = 'insert into ProbeSetData values %s' % cmd
+        db.execute(cmd)
+
+        cmd = ','.join(values2)
+        cmd = 'insert into ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) values %s' % cmd
+        db.execute(cmd)
+
+        values1 = []
+        values2 = []
+        print(('Inserted ', kj, ' lines'))
+        print(('used ', time.time()-time0, ' seconds'))
+
+    line = fp.readline()
+
+
+if len(values1) > 0:
+    cmd = ','.join(values1)
+    cmd = 'insert into ProbeSetData values %s' % cmd
+    db.execute(cmd)
+
+    cmd = ','.join(values2)
+    cmd = 'insert into ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) values %s' % cmd
+    db.execute(cmd)
 
 con.close()