aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorJohn Nduli2024-07-12 15:04:03 +0300
committerBonfaceKilz2024-07-23 10:18:32 +0300
commit3bdda52511277ee8d9c09ea6332512256f408ad7 (patch)
tree5301ff52793d93c2aaef3bbee18682f3d1f1fc06 /scripts
parentbcce1a5ee31973a573f91c3113d7ae518bb1e612 (diff)
downloadgenenetwork3-3bdda52511277ee8d9c09ea6332512256f408ad7.tar.gz
chore: fix pylint errors
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/update_rif_table.py125
1 files changed, 64 insertions, 61 deletions
diff --git a/scripts/update_rif_table.py b/scripts/update_rif_table.py
index 84d1a97..8566d60 100755
--- a/scripts/update_rif_table.py
+++ b/scripts/update_rif_table.py
@@ -24,6 +24,10 @@
# Updated on Lei Yan 2011/02/08
# created by Lei Yan 02/08/2011
+"""
+Script responsible for updating the GenerRIF_BASIC table
+"""
+
import os
import sys
import MySQLdb
@@ -35,6 +39,7 @@ sys.path.insert(0, path2)
def fetchrif():
+ """ TODO: break this down into modules """
try:
con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password")
cursor = con.cursor()
@@ -43,85 +48,83 @@ def fetchrif():
print("You entered incorrect password.\n")
sys.exit(0)
- taxIds = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
- taxIdKeys = taxIds.keys()
+ tax_ids = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
+ tax_id_keys = tax_ids.keys()
os.chdir(path3)
- print("path3: %s" % (path3))
+ print(f"path3: {path3}")
genedict = {}
os.system("rm -vf gene_info")
os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz")
os.system("gunzip gene_info.gz")
- file = open("gene_info", "r")
- i = 0
- for line1 in file:
- line1 = line1.strip()
- if line1.startswith("#"):
- continue
- line2 = line1.strip().split("\t")
- if line2[0] in taxIdKeys:
- genedict[line2[1]] = line2[2]
- i += 1
- if i % 1000000 == 0:
- print("finished: %d" % (i))
- print("finished all: %d" % (i))
- file.close()
+ with open("gene_info", "r") as file:
+ i = 0
+ for line1 in file:
+ line1 = line1.strip()
+ if line1.startswith("#"):
+ continue
+ line2 = line1.strip().split("\t")
+ if line2[0] in tax_id_keys:
+ genedict[line2[1]] = line2[2]
+ i += 1
+ if i % 1000000 == 0:
+ print(f"finished: {i}")
+ print(f"finished all: {i}")
os.system("rm -vf generifs_basic")
os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz")
os.system("gunzip generifs_basic.gz")
- file = open("generifs_basic", "r")
- i = 0
- for line1 in file:
- line1 = line1.strip()
- if line1.startswith("#"):
- continue
- line2 = line1.strip().split("\t")
- if line2[0] in taxIdKeys and len(line2) >= 5:
- line2[0] = taxIds[line2[0]]
- try:
- symbol = genedict[line2[1]]
- except:
- symbol = ""
- sql = """
- SELECT COUNT(*)
- FROM GeneRIF_BASIC
- WHERE GeneRIF_BASIC.`SpeciesId`=%s
- AND GeneRIF_BASIC.`GeneId`=%s
- AND GeneRIF_BASIC.`PubMed_ID`=%s
- AND GeneRIF_BASIC.`createtime`=%s
- AND GeneRIF_BASIC.`comment`=%s
- """
- cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
- c = cursor.fetchone()[0]
- if c == 0:
- print("to insert...")
+ with open("generifs_basic", "r") as file:
+ i = 0
+ for line1 in file:
+ line1 = line1.strip()
+ if line1.startswith("#"):
+ continue
+ line2 = line1.strip().split("\t")
+ if line2[0] in tax_id_keys and len(line2) >= 5:
+ line2[0] = tax_ids[line2[0]]
+ try:
+ symbol = genedict[line2[1]]
+ except:
+ symbol = ""
sql = """
- INSERT INTO GeneRIF_BASIC
- SET GeneRIF_BASIC.`SpeciesId`=%s,
- GeneRIF_BASIC.`GeneId`=%s,
- GeneRIF_BASIC.`symbol`=%s,
- GeneRIF_BASIC.`PubMed_ID`=%s,
- GeneRIF_BASIC.`createtime`=%s,
- GeneRIF_BASIC.`comment`=%s
- """
- cursor.execute(
- sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
- )
- i += 1
- if i % 100000 == 0:
- print("finished: %d" % (i))
- print("finished all: %d" % (i))
- file.close()
- cursor.close()
+ SELECT COUNT(*)
+ FROM GeneRIF_BASIC
+ WHERE GeneRIF_BASIC.`SpeciesId`=%s
+ AND GeneRIF_BASIC.`GeneId`=%s
+ AND GeneRIF_BASIC.`PubMed_ID`=%s
+ AND GeneRIF_BASIC.`createtime`=%s
+ AND GeneRIF_BASIC.`comment`=%s
+ """
+ cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
+ count = cursor.fetchone()[0]
+ if count == 0:
+ print("to insert...")
+ sql = """
+ INSERT INTO GeneRIF_BASIC
+ SET GeneRIF_BASIC.`SpeciesId`=%s,
+ GeneRIF_BASIC.`GeneId`=%s,
+ GeneRIF_BASIC.`symbol`=%s,
+ GeneRIF_BASIC.`PubMed_ID`=%s,
+ GeneRIF_BASIC.`createtime`=%s,
+ GeneRIF_BASIC.`comment`=%s
+ """
+ cursor.execute(
+ sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
+ )
+ i += 1
+ if i % 100000 == 0:
+ print(f"finished: {i}")
+ print(f"finished all: {i}")
+ cursor.close()
# /usr/bin/python addRif.py
if __name__ == "__main__":
- print("command line arguments:\n\t%s" % sys.argv)
+ print(f"command line arguments:\n\t{sys.argv}")
fetchrif()
print("exit successfully")