aboutsummaryrefslogtreecommitdiff
path: root/scripts/maintenance/readProbeSetMean_v7.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maintenance/readProbeSetMean_v7.py')
-rwxr-xr-xscripts/maintenance/readProbeSetMean_v7.py76
1 files changed, 37 insertions, 39 deletions
diff --git a/scripts/maintenance/readProbeSetMean_v7.py b/scripts/maintenance/readProbeSetMean_v7.py
index e9c8f25c..59a51cf9 100755
--- a/scripts/maintenance/readProbeSetMean_v7.py
+++ b/scripts/maintenance/readProbeSetMean_v7.py
@@ -31,20 +31,20 @@ def translateAlias(str):
dataStart = 1
-GeneChipId = int( raw_input("Enter GeneChipId:") )
-ProbeSetFreezeId = int( raw_input("Enter ProbeSetFreezeId:") )
-input_file_name = raw_input("Enter file name with suffix:")
+GeneChipId = int( input("Enter GeneChipId:") )
+ProbeSetFreezeId = int( input("Enter ProbeSetFreezeId:") )
+input_file_name = input("Enter file name with suffix:")
fp = open("%s" % input_file_name, 'rb')
try:
passwd = getpass.getpass('Please enter mysql password here : ')
- con = MySQLdb.Connect(db='db_webqtl',host='localhost', user='username',passwd=passwd)
+ con = MySQLdb.Connect(db='db_webqtl', host='localhost', user='username', passwd=passwd)
db = con.cursor()
- print "You have successfully connected to mysql.\n"
+ print("You have successfully connected to mysql.\n")
except:
- print "You entered incorrect password.\n"
+ print("You entered incorrect password.\n")
sys.exit(0)
time0 = time.time()
@@ -55,22 +55,22 @@ time0 = time.time()
# generate the gene list of expression data here
#
#########################################################################
-print 'Checking if each line have same number of members'
+print('Checking if each line have same number of members')
GeneList = []
isCont = 1
header = fp.readline()
-header = string.split(string.strip(header),'\t')
-header = map(string.strip, header)
+header = string.split(string.strip(header), '\t')
+header = list(map(string.strip, header))
nfield = len(header)
line = fp.readline()
kj=0
while line:
- line2 = string.split(string.strip(line),'\t')
- line2 = map(string.strip, line2)
+ line2 = string.split(string.strip(line), '\t')
+ line2 = list(map(string.strip, line2))
if len(line2) != nfield:
- print "Error : " + line
+ print(("Error : " + line))
isCont = 0
GeneList.append(line2[0])
@@ -78,30 +78,29 @@ while line:
kj+=1
if kj%100000 == 0:
- print 'checked ',kj,' lines'
+ print(('checked ', kj, ' lines'))
-GeneList = map(string.lower, GeneList)
-GeneList.sort()
+GeneList = sorted(map(string.lower, GeneList))
if isCont==0:
sys.exit(0)
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
#########################################################################
#
# Check if each strain exist in database
# generate the string id list of expression data here
#
#########################################################################
-print 'Checking if each strain exist in database'
+print('Checking if each strain exist in database')
isCont = 1
fp.seek(0)
header = fp.readline()
-header = string.split(string.strip(header),'\t')
-header = map(string.strip, header)
-header = map(translateAlias, header)
+header = string.split(string.strip(header), '\t')
+header = list(map(string.strip, header))
+header = list(map(translateAlias, header))
header = header[dataStart:]
Ids = []
for item in header:
@@ -109,26 +108,26 @@ for item in header:
db.execute('select Id from Strain where Name = "%s"' % item)
Ids.append(db.fetchall()[0][0])
except:
- print item,'does not exist, check the if the strain name is correct'
+ print((item, 'does not exist, check the if the strain name is correct'))
isCont=0
if isCont==0:
sys.exit(0)
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
########################################################################
#
# Check if each ProbeSet exist in database
#
########################################################################
-print 'Check if each ProbeSet exist in database'
+print('Check if each ProbeSet exist in database')
##---- find PID is name or target ----##
line = fp.readline()
line = fp.readline()
-line2 = string.split(string.strip(line),'\t')
-line2 = map(string.strip, line2)
+line2 = string.split(string.strip(line), '\t')
+line2 = list(map(string.strip, line2))
PId = line2[0]
db.execute('select Id from ProbeSet where Name="%s" and ChipId=%d' % (PId, GeneChipId) )
@@ -146,11 +145,10 @@ Names = []
for item in results:
Names.append(item[0])
-print Names
+print(Names)
-Names = map(string.lower, Names)
+Names = sorted(map(string.lower, Names))
-Names.sort() # -- Fixed the lower case problem of ProbeSets affx-mur_b2_at doesn't exist --#
##---- compare genelist with names ----##
@@ -170,7 +168,7 @@ while x<len(GeneList) and y<len(Names):
y += 1
if x%100000==0:
- print 'check Name, checked %d lines'%x
+ print(('check Name, checked %d lines'%x))
while x<len(GeneList):
GeneList2.append(GeneList[x])
@@ -180,20 +178,20 @@ isCont=1
ferror = open("ProbeSetError.txt", "wb")
for item in GeneList2:
ferror.write(item + " doesn't exist \n")
- print item, " doesn't exist, check if the ProbeSet name is correct"
+ print((item, " doesn't exist, check if the ProbeSet name is correct"))
isCont = 0
if isCont==0:
sys.exit(0)
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
#########################################################################
#
# Insert data into database
#
#########################################################################
-print 'getting ProbeSet/Id'
+print('getting ProbeSet/Id')
#---- get Name/Id map ----#
@@ -202,16 +200,16 @@ results = db.fetchall()
NameIds = {}
for item in results:
NameIds[item[0]] = item[1]
-print 'used ',time.time()-time0,' seconds'
+print(('used ', time.time()-time0, ' seconds'))
-print 'inserting data'
+print('inserting data')
##---- get old max dataId ----##
db.execute('select max(Id) from ProbeSetData')
maxDataId = int(db.fetchall()[0][0])
bmax = maxDataId
-print "old_max = %d\n" % bmax
+print(("old_max = %d\n" % bmax))
##---- insert data ----##
fp.seek(0)
@@ -222,8 +220,8 @@ kj = 0
values1 = []
values2 = []
while line:
- line2 = string.split(string.strip(line),'\t')
- line2 = map(string.strip, line2)
+ line2 = string.split(string.strip(line), '\t')
+ line2 = list(map(string.strip, line2))
PId = line2[0]
recordId = NameIds[PId]
@@ -255,8 +253,8 @@ while line:
values1=[]
values2=[]
- print 'Inserted ', kj,' lines'
- print 'used ',time.time()-time0,' seconds'
+ print(('Inserted ', kj, ' lines'))
+ print(('used ', time.time()-time0, ' seconds'))
line = fp.readline()