1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by Lei Yan 2011/02/08
# created by Lei Yan 02/08/2011
import os
import sys
import MySQLdb
path1 = os.path.abspath(os.path.dirname(__file__))
path2 = path1 + "/.."
path3 = path1 + "/../../tmp"
sys.path.insert(0, path2)
def fetchrif():
try:
con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password")
cursor = con.cursor()
print("You have successfully connected to mysql.\n")
except:
print("You entered incorrect password.\n")
sys.exit(0)
taxIds = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
taxIdKeys = taxIds.keys()
os.chdir(path3)
print("path3: %s" % (path3))
genedict = {}
os.system("rm -vf gene_info")
os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz")
os.system("gunzip gene_info.gz")
file = open("gene_info", "r")
i = 0
for line1 in file:
line1 = line1.strip()
if line1.startswith("#"):
continue
line2 = line1.strip().split("\t")
if line2[0] in taxIdKeys:
genedict[line2[1]] = line2[2]
i += 1
if i % 1000000 == 0:
print("finished: %d" % (i))
print("finished all: %d" % (i))
file.close()
os.system("rm -vf generifs_basic")
os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz")
os.system("gunzip generifs_basic.gz")
file = open("generifs_basic", "r")
i = 0
for line1 in file:
line1 = line1.strip()
if line1.startswith("#"):
continue
line2 = line1.strip().split("\t")
if line2[0] in taxIdKeys and len(line2) >= 5:
line2[0] = taxIds[line2[0]]
try:
symbol = genedict[line2[1]]
except:
symbol = ""
sql = """
SELECT COUNT(*)
FROM GeneRIF_BASIC
WHERE GeneRIF_BASIC.`SpeciesId`=%s
AND GeneRIF_BASIC.`GeneId`=%s
AND GeneRIF_BASIC.`PubMed_ID`=%s
AND GeneRIF_BASIC.`createtime`=%s
AND GeneRIF_BASIC.`comment`=%s
"""
cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
c = cursor.fetchone()[0]
if c == 0:
print("to insert...")
sql = """
INSERT INTO GeneRIF_BASIC
SET GeneRIF_BASIC.`SpeciesId`=%s,
GeneRIF_BASIC.`GeneId`=%s,
GeneRIF_BASIC.`symbol`=%s,
GeneRIF_BASIC.`PubMed_ID`=%s,
GeneRIF_BASIC.`createtime`=%s,
GeneRIF_BASIC.`comment`=%s
"""
cursor.execute(
sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
)
i += 1
if i % 100000 == 0:
print("finished: %d" % (i))
print("finished all: %d" % (i))
file.close()
cursor.close()
# /usr/bin/python addRif.py
if __name__ == "__main__":
print("command line arguments:\n\t%s" % sys.argv)
fetchrif()
print("exit successfully")
|