diff options
Diffstat (limited to 'scripts/maintenance/readProbeSetMean_v7.py')
-rwxr-xr-x | scripts/maintenance/readProbeSetMean_v7.py | 76 |
1 files changed, 37 insertions, 39 deletions
diff --git a/scripts/maintenance/readProbeSetMean_v7.py b/scripts/maintenance/readProbeSetMean_v7.py index e9c8f25c..59a51cf9 100755 --- a/scripts/maintenance/readProbeSetMean_v7.py +++ b/scripts/maintenance/readProbeSetMean_v7.py @@ -31,20 +31,20 @@ def translateAlias(str): dataStart = 1 -GeneChipId = int( raw_input("Enter GeneChipId:") ) -ProbeSetFreezeId = int( raw_input("Enter ProbeSetFreezeId:") ) -input_file_name = raw_input("Enter file name with suffix:") +GeneChipId = int( input("Enter GeneChipId:") ) +ProbeSetFreezeId = int( input("Enter ProbeSetFreezeId:") ) +input_file_name = input("Enter file name with suffix:") fp = open("%s" % input_file_name, 'rb') try: passwd = getpass.getpass('Please enter mysql password here : ') - con = MySQLdb.Connect(db='db_webqtl',host='localhost', user='username',passwd=passwd) + con = MySQLdb.Connect(db='db_webqtl', host='localhost', user='username', passwd=passwd) db = con.cursor() - print "You have successfully connected to mysql.\n" + print("You have successfully connected to mysql.\n") except: - print "You entered incorrect password.\n" + print("You entered incorrect password.\n") sys.exit(0) time0 = time.time() @@ -55,22 +55,22 @@ time0 = time.time() # generate the gene list of expression data here # ######################################################################### -print 'Checking if each line have same number of members' +print('Checking if each line have same number of members') GeneList = [] isCont = 1 header = fp.readline() -header = string.split(string.strip(header),'\t') -header = map(string.strip, header) +header = string.split(string.strip(header), '\t') +header = list(map(string.strip, header)) nfield = len(header) line = fp.readline() kj=0 while line: - line2 = string.split(string.strip(line),'\t') - line2 = map(string.strip, line2) + line2 = string.split(string.strip(line), '\t') + line2 = list(map(string.strip, line2)) if len(line2) != nfield: - print "Error : " + line + print(("Error : " + line)) isCont = 0 GeneList.append(line2[0]) @@ -78,30 +78,29 @@ while line: kj+=1 if kj%100000 == 0: - print 'checked ',kj,' lines' + print(('checked ', kj, ' lines')) -GeneList = map(string.lower, GeneList) -GeneList.sort() +GeneList = sorted(map(string.lower, GeneList)) if isCont==0: sys.exit(0) -print 'used ',time.time()-time0,' seconds' +print(('used ', time.time()-time0, ' seconds')) ######################################################################### # # Check if each strain exist in database # generate the string id list of expression data here # ######################################################################### -print 'Checking if each strain exist in database' +print('Checking if each strain exist in database') isCont = 1 fp.seek(0) header = fp.readline() -header = string.split(string.strip(header),'\t') -header = map(string.strip, header) -header = map(translateAlias, header) +header = string.split(string.strip(header), '\t') +header = list(map(string.strip, header)) +header = list(map(translateAlias, header)) header = header[dataStart:] Ids = [] for item in header: @@ -109,26 +108,26 @@ for item in header: db.execute('select Id from Strain where Name = "%s"' % item) Ids.append(db.fetchall()[0][0]) except: - print item,'does not exist, check the if the strain name is correct' + print((item, 'does not exist, check the if the strain name is correct')) isCont=0 if isCont==0: sys.exit(0) -print 'used ',time.time()-time0,' seconds' +print(('used ', time.time()-time0, ' seconds')) ######################################################################## # # Check if each ProbeSet exist in database # ######################################################################## -print 'Check if each ProbeSet exist in database' +print('Check if each ProbeSet exist in database') ##---- find PID is name or target ----## line = fp.readline() line = fp.readline() -line2 = string.split(string.strip(line),'\t') -line2 = map(string.strip, line2) +line2 = string.split(string.strip(line), '\t') +line2 = list(map(string.strip, line2)) PId = line2[0] db.execute('select Id from ProbeSet where Name="%s" and ChipId=%d' % (PId, GeneChipId) ) @@ -146,11 +145,10 @@ Names = [] for item in results: Names.append(item[0]) -print Names +print(Names) -Names = map(string.lower, Names) +Names = sorted(map(string.lower, Names)) -Names.sort() # -- Fixed the lower case problem of ProbeSets affx-mur_b2_at doesn't exist --# ##---- compare genelist with names ----## @@ -170,7 +168,7 @@ while x<len(GeneList) and y<len(Names): y += 1 if x%100000==0: - print 'check Name, checked %d lines'%x + print(('check Name, checked %d lines'%x)) while x<len(GeneList): GeneList2.append(GeneList[x]) @@ -180,20 +178,20 @@ isCont=1 ferror = open("ProbeSetError.txt", "wb") for item in GeneList2: ferror.write(item + " doesn't exist \n") - print item, " doesn't exist, check if the ProbeSet name is correct" + print((item, " doesn't exist, check if the ProbeSet name is correct")) isCont = 0 if isCont==0: sys.exit(0) -print 'used ',time.time()-time0,' seconds' +print(('used ', time.time()-time0, ' seconds')) ######################################################################### # # Insert data into database # ######################################################################### -print 'getting ProbeSet/Id' +print('getting ProbeSet/Id') #---- get Name/Id map ----# @@ -202,16 +200,16 @@ results = db.fetchall() NameIds = {} for item in results: NameIds[item[0]] = item[1] -print 'used ',time.time()-time0,' seconds' +print(('used ', time.time()-time0, ' seconds')) -print 'inserting data' +print('inserting data') ##---- get old max dataId ----## db.execute('select max(Id) from ProbeSetData') maxDataId = int(db.fetchall()[0][0]) bmax = maxDataId -print "old_max = %d\n" % bmax +print(("old_max = %d\n" % bmax)) ##---- insert data ----## fp.seek(0) @@ -222,8 +220,8 @@ kj = 0 values1 = [] values2 = [] while line: - line2 = string.split(string.strip(line),'\t') - line2 = map(string.strip, line2) + line2 = string.split(string.strip(line), '\t') + line2 = list(map(string.strip, line2)) PId = line2[0] recordId = NameIds[PId] @@ -255,8 +253,8 @@ while line: values1=[] values2=[] - print 'Inserted ', kj,' lines' - print 'used ',time.time()-time0,' seconds' + print(('Inserted ', kj, ' lines')) + print(('used ', time.time()-time0, ' seconds')) line = fp.readline() |