aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/addRif.py131
1 files changed, 67 insertions, 64 deletions
diff --git a/scripts/addRif.py b/scripts/addRif.py
index 8ea5f74..4d33af3 100755
--- a/scripts/addRif.py
+++ b/scripts/addRif.py
@@ -26,70 +26,69 @@
# created by Lei Yan 02/08/2011
-import string
-import MySQLdb
-import time
import os
import sys
+import MySQLdb
path1 = os.path.abspath(os.path.dirname(__file__))
path2 = path1 + "/.."
path3 = path1 + "/../../tmp"
sys.path.insert(0, path2)
+
def fetchrif():
- try:
- con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password")
- cursor = con.cursor()
- print("You have successfully connected to mysql.\n")
- except:
- print("You entered incorrect password.\n")
- sys.exit(0)
+ try:
+ con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password")
+ cursor = con.cursor()
+ print("You have successfully connected to mysql.\n")
+ except:
+ print("You entered incorrect password.\n")
+ sys.exit(0)
- taxIds = {'10090':1, '9606':4, '10116':2, '3702':3}
- taxIdKeys = taxIds.keys()
+ taxIds = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
+ taxIdKeys = taxIds.keys()
- os.chdir(path3)
- print("path3: %s" % (path3))
- genedict = {}
+ os.chdir(path3)
+ print("path3: %s" % (path3))
+ genedict = {}
- os.system("rm -vf gene_info")
- os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz")
- os.system("gunzip gene_info.gz")
+ os.system("rm -vf gene_info")
+ os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz")
+ os.system("gunzip gene_info.gz")
- file = open("gene_info", 'r')
- i = 0
- for line1 in file:
- line1 = line1.strip()
- if line1.startswith('#'):
- continue
- line2 = line1.strip().split("\t")
- if line2[0] in taxIdKeys:
- genedict[line2[1]] = line2[2]
- i += 1
- if i%1000000 == 0:
- print("finished: %d" % (i))
- print("finished all: %d" % (i))
- file.close()
+ file = open("gene_info", "r")
+ i = 0
+ for line1 in file:
+ line1 = line1.strip()
+ if line1.startswith("#"):
+ continue
+ line2 = line1.strip().split("\t")
+ if line2[0] in taxIdKeys:
+ genedict[line2[1]] = line2[2]
+ i += 1
+ if i % 1000000 == 0:
+ print("finished: %d" % (i))
+ print("finished all: %d" % (i))
+ file.close()
- os.system("rm -vf generifs_basic")
- os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz")
- os.system("gunzip generifs_basic.gz")
+ os.system("rm -vf generifs_basic")
+ os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz")
+ os.system("gunzip generifs_basic.gz")
- file = open("generifs_basic", 'r')
- i = 0
- for line1 in file:
- line1 = line1.strip()
- if line1.startswith('#'):
- continue
- line2 = line1.strip().split("\t")
- if line2[0] in taxIdKeys and len(line2) >= 5:
- line2[0] = taxIds[line2[0]]
- try:
- symbol = genedict[line2[1]]
- except:
- symbol = ""
- sql = """
+ file = open("generifs_basic", "r")
+ i = 0
+ for line1 in file:
+ line1 = line1.strip()
+ if line1.startswith("#"):
+ continue
+ line2 = line1.strip().split("\t")
+ if line2[0] in taxIdKeys and len(line2) >= 5:
+ line2[0] = taxIds[line2[0]]
+ try:
+ symbol = genedict[line2[1]]
+ except:
+ symbol = ""
+ sql = """
SELECT COUNT(*)
FROM GeneRIF_BASIC
WHERE GeneRIF_BASIC.`SpeciesId`=%s
@@ -98,11 +97,11 @@ def fetchrif():
AND GeneRIF_BASIC.`createtime`=%s
AND GeneRIF_BASIC.`comment`=%s
"""
- cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
- c = cursor.fetchone()[0]
- if c == 0:
- print("to insert...")
- sql = """
+ cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
+ c = cursor.fetchone()[0]
+ if c == 0:
+ print("to insert...")
+ sql = """
INSERT INTO GeneRIF_BASIC
SET GeneRIF_BASIC.`SpeciesId`=%s,
GeneRIF_BASIC.`GeneId`=%s,
@@ -111,16 +110,20 @@ def fetchrif():
GeneRIF_BASIC.`createtime`=%s,
GeneRIF_BASIC.`comment`=%s
"""
- cursor.execute(sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4]))
- i += 1
- if i%100000 == 0:
- print("finished: %d" % (i))
- print("finished all: %d" % (i))
- file.close()
- cursor.close()
+ cursor.execute(
+ sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
+ )
+ i += 1
+ if i % 100000 == 0:
+ print("finished: %d" % (i))
+ print("finished all: %d" % (i))
+ file.close()
+ cursor.close()
+
+
# /usr/bin/python addRif.py
if __name__ == "__main__":
- print("command line arguments:\n\t%s" % sys.argv)
- fetchrif()
- print("exit successfully")
+ print("command line arguments:\n\t%s" % sys.argv)
+ fetchrif()
+ print("exit successfully")