From 3bdda52511277ee8d9c09ea6332512256f408ad7 Mon Sep 17 00:00:00 2001 From: John Nduli Date: Fri, 12 Jul 2024 15:04:03 +0300 Subject: chore: fix pylint errors --- scripts/update_rif_table.py | 125 +++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 61 deletions(-) (limited to 'scripts/update_rif_table.py') diff --git a/scripts/update_rif_table.py b/scripts/update_rif_table.py index 84d1a97..8566d60 100755 --- a/scripts/update_rif_table.py +++ b/scripts/update_rif_table.py @@ -24,6 +24,10 @@ # Updated on Lei Yan 2011/02/08 # created by Lei Yan 02/08/2011 +""" +Script responsible for updating the GenerRIF_BASIC table +""" + import os import sys import MySQLdb @@ -35,6 +39,7 @@ sys.path.insert(0, path2) def fetchrif(): + """ TODO: break this down into modules """ try: con = MySQLdb.Connect(db="gn3", host="localhost", user="gn2", passwd="password") cursor = con.cursor() @@ -43,85 +48,83 @@ def fetchrif(): print("You entered incorrect password.\n") sys.exit(0) - taxIds = {"10090": 1, "9606": 4, "10116": 2, "3702": 3} - taxIdKeys = taxIds.keys() + tax_ids = {"10090": 1, "9606": 4, "10116": 2, "3702": 3} + tax_id_keys = tax_ids.keys() os.chdir(path3) - print("path3: %s" % (path3)) + print(f"path3: {path3}") genedict = {} os.system("rm -vf gene_info") os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz") os.system("gunzip gene_info.gz") - file = open("gene_info", "r") - i = 0 - for line1 in file: - line1 = line1.strip() - if line1.startswith("#"): - continue - line2 = line1.strip().split("\t") - if line2[0] in taxIdKeys: - genedict[line2[1]] = line2[2] - i += 1 - if i % 1000000 == 0: - print("finished: %d" % (i)) - print("finished all: %d" % (i)) - file.close() + with open("gene_info", "r") as file: + i = 0 + for line1 in file: + line1 = line1.strip() + if line1.startswith("#"): + continue + line2 = line1.strip().split("\t") + if line2[0] in tax_id_keys: + genedict[line2[1]] = line2[2] + i += 1 + if i % 1000000 == 0: + print(f"finished: {i}") + print(f"finished all: {i}") os.system("rm -vf generifs_basic") os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz") os.system("gunzip generifs_basic.gz") - file = open("generifs_basic", "r") - i = 0 - for line1 in file: - line1 = line1.strip() - if line1.startswith("#"): - continue - line2 = line1.strip().split("\t") - if line2[0] in taxIdKeys and len(line2) >= 5: - line2[0] = taxIds[line2[0]] - try: - symbol = genedict[line2[1]] - except: - symbol = "" - sql = """ - SELECT COUNT(*) - FROM GeneRIF_BASIC - WHERE GeneRIF_BASIC.`SpeciesId`=%s - AND GeneRIF_BASIC.`GeneId`=%s - AND GeneRIF_BASIC.`PubMed_ID`=%s - AND GeneRIF_BASIC.`createtime`=%s - AND GeneRIF_BASIC.`comment`=%s - """ - cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4])) - c = cursor.fetchone()[0] - if c == 0: - print("to insert...") + with open("generifs_basic", "r") as file: + i = 0 + for line1 in file: + line1 = line1.strip() + if line1.startswith("#"): + continue + line2 = line1.strip().split("\t") + if line2[0] in tax_id_keys and len(line2) >= 5: + line2[0] = tax_ids[line2[0]] + try: + symbol = genedict[line2[1]] + except: + symbol = "" sql = """ - INSERT INTO GeneRIF_BASIC - SET GeneRIF_BASIC.`SpeciesId`=%s, - GeneRIF_BASIC.`GeneId`=%s, - GeneRIF_BASIC.`symbol`=%s, - GeneRIF_BASIC.`PubMed_ID`=%s, - GeneRIF_BASIC.`createtime`=%s, - GeneRIF_BASIC.`comment`=%s - """ - cursor.execute( - sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4]) - ) - i += 1 - if i % 100000 == 0: - print("finished: %d" % (i)) - print("finished all: %d" % (i)) - file.close() - cursor.close() + SELECT COUNT(*) + FROM GeneRIF_BASIC + WHERE GeneRIF_BASIC.`SpeciesId`=%s + AND GeneRIF_BASIC.`GeneId`=%s + AND GeneRIF_BASIC.`PubMed_ID`=%s + AND GeneRIF_BASIC.`createtime`=%s + AND GeneRIF_BASIC.`comment`=%s + """ + cursor.execute(sql, (line2[0], line2[1], line2[2], line2[3], line2[4])) + count = cursor.fetchone()[0] + if count == 0: + print("to insert...") + sql = """ + INSERT INTO GeneRIF_BASIC + SET GeneRIF_BASIC.`SpeciesId`=%s, + GeneRIF_BASIC.`GeneId`=%s, + GeneRIF_BASIC.`symbol`=%s, + GeneRIF_BASIC.`PubMed_ID`=%s, + GeneRIF_BASIC.`createtime`=%s, + GeneRIF_BASIC.`comment`=%s + """ + cursor.execute( + sql, (line2[0], line2[1], symbol, line2[2], line2[3], line2[4]) + ) + i += 1 + if i % 100000 == 0: + print(f"finished: {i}") + print(f"finished all: {i}") + cursor.close() # /usr/bin/python addRif.py if __name__ == "__main__": - print("command line arguments:\n\t%s" % sys.argv) + print(f"command line arguments:\n\t{sys.argv}") fetchrif() print("exit successfully") -- cgit v1.2.3