about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJohn Nduli2024-07-12 15:04:03 +0300
committerBonfaceKilz2024-07-23 10:18:32 +0300
commit3bdda52511277ee8d9c09ea6332512256f408ad7 (patch)
tree5301ff52793d93c2aaef3bbee18682f3d1f1fc06
parentbcce1a5ee31973a573f91c3113d7ae518bb1e612 (diff)
downloadgenenetwork3-3bdda52511277ee8d9c09ea6332512256f408ad7.tar.gz
chore: fix pylint errors
-rwxr-xr-xscripts/update_rif_table.py125
1 files changed, 64 insertions, 61 deletions
diff --git a/scripts/update_rif_table.py b/scripts/update_rif_table.py
index 84d1a97..8566d60 100755
--- a/scripts/update_rif_table.py
+++ b/scripts/update_rif_table.py
@@ -24,6 +24,10 @@
 # Updated on Lei Yan 2011/02/08
 # created by Lei Yan 02/08/2011
 
+"""
+Script responsible for updating the GenerRIF_BASIC table
+"""
+
 import os
 import sys
 import MySQLdb
@@ -35,6 +39,7 @@ sys.path.insert(0, path2)
 
 
 def fetchrif():
+    """ TODO: break this down into modules """
     try:
         con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password")
         cursor = con.cursor()
@@ -43,85 +48,83 @@ def fetchrif():
         print("You entered incorrect password.\n")
         sys.exit(0)
 
-    taxIds = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
-    taxIdKeys = taxIds.keys()
+    tax_ids = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
+    tax_id_keys = tax_ids.keys()
 
     os.chdir(path3)
-    print("path3: %s" % (path3))
+    print(f"path3: {path3}")
     genedict = {}
 
     os.system("rm -vf gene_info")
     os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz")
     os.system("gunzip gene_info.gz")
 
-    file = open("gene_info", "r")
-    i = 0
-    for line1 in file:
-        line1 = line1.strip()
-        if line1.startswith("#"):
-            continue
-        line2 = line1.strip().split("\t")
-        if line2[0] in taxIdKeys:
-            genedict[line2[1]] = line2[2]
-        i += 1
-        if i % 1000000 == 0:
-            print("finished: %d" % (i))
-    print("finished all: %d" % (i))
-    file.close()
+    with open("gene_info", "r") as file:
+        i = 0
+        for line1 in file:
+            line1 = line1.strip()
+            if line1.startswith("#"):
+                continue
+            line2 = line1.strip().split("\t")
+            if line2[0] in tax_id_keys:
+                genedict[line2[1]] = line2[2]
+            i += 1
+            if i % 1000000 == 0:
+                print(f"finished: {i}")
+        print(f"finished all: {i}")
 
     os.system("rm -vf generifs_basic")
     os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz")
     os.system("gunzip generifs_basic.gz")
 
-    file = open("generifs_basic", "r")
-    i = 0
-    for line1 in file:
-        line1 = line1.strip()
-        if line1.startswith("#"):
-            continue
-        line2 = line1.strip().split("\t")
-        if line2[0] in taxIdKeys and len(line2) >= 5:
-            line2[0] = taxIds[line2[0]]
-            try:
-                symbol = genedict[line2[1]]
-            except:
-                symbol = ""
-            sql = """
-				SELECT COUNT(*)
-				FROM GeneRIF_BASIC
-				WHERE GeneRIF_BASIC.`SpeciesId`=%s
-				AND GeneRIF_BASIC.`GeneId`=%s
-				AND GeneRIF_BASIC.`PubMed_ID`=%s
-				AND GeneRIF_BASIC.`createtime`=%s
-				AND GeneRIF_BASIC.`comment`=%s
-				"""
-            cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
-            c = cursor.fetchone()[0]
-            if c == 0:
-                print("to insert...")
+    with open("generifs_basic", "r") as file:
+        i = 0
+        for line1 in file:
+            line1 = line1.strip()
+            if line1.startswith("#"):
+                continue
+            line2 = line1.strip().split("\t")
+            if line2[0] in tax_id_keys and len(line2) >= 5:
+                line2[0] = tax_ids[line2[0]]
+                try:
+                    symbol = genedict[line2[1]]
+                except:
+                    symbol = ""
                 sql = """
-					INSERT INTO GeneRIF_BASIC
-					SET GeneRIF_BASIC.`SpeciesId`=%s,
-						GeneRIF_BASIC.`GeneId`=%s,
-						GeneRIF_BASIC.`symbol`=%s,
-						GeneRIF_BASIC.`PubMed_ID`=%s,
-						GeneRIF_BASIC.`createtime`=%s,
-						GeneRIF_BASIC.`comment`=%s
-					"""
-                cursor.execute(
-                    sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
-                )
-        i += 1
-        if i % 100000 == 0:
-            print("finished: %d" % (i))
-    print("finished all: %d" % (i))
-    file.close()
-    cursor.close()
+                                    SELECT COUNT(*)
+                                    FROM GeneRIF_BASIC
+                                    WHERE GeneRIF_BASIC.`SpeciesId`=%s
+                                    AND GeneRIF_BASIC.`GeneId`=%s
+                                    AND GeneRIF_BASIC.`PubMed_ID`=%s
+                                    AND GeneRIF_BASIC.`createtime`=%s
+                                    AND GeneRIF_BASIC.`comment`=%s
+                                    """
+                cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4]))
+                count = cursor.fetchone()[0]
+                if count == 0:
+                    print("to insert...")
+                    sql = """
+                                            INSERT INTO GeneRIF_BASIC
+                                            SET GeneRIF_BASIC.`SpeciesId`=%s,
+                                                    GeneRIF_BASIC.`GeneId`=%s,
+                                                    GeneRIF_BASIC.`symbol`=%s,
+                                                    GeneRIF_BASIC.`PubMed_ID`=%s,
+                                                    GeneRIF_BASIC.`createtime`=%s,
+                                                    GeneRIF_BASIC.`comment`=%s
+                                            """
+                    cursor.execute(
+                        sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4])
+                    )
+            i += 1
+            if i % 100000 == 0:
+                print(f"finished: {i}")
+        print(f"finished all: {i}")
+        cursor.close()
 
 
 # /usr/bin/python addRif.py
 
 if __name__ == "__main__":
-    print("command line arguments:\n\t%s" % sys.argv)
+    print(f"command line arguments:\n\t{sys.argv}")
     fetchrif()
     print("exit successfully")