# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License # as published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Affero General Public License for more details. # # This program is available from Source Forge: at GeneNetwork Project # (sourceforge.net/projects/genenetwork/). # # Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) # at rwilliams@uthsc.edu and xzhou15@uthsc.edu # # # # This module is used by GeneNetwork project (www.genenetwork.org) # # Created by GeneNetwork Core Team 2010/08/10 # # Last updated by Lei Yan 2011/02/08 # created by Lei Yan 02/08/2011 import string import MySQLdb import time import os import sys path1 = os.path.abspath(os.path.dirname(__file__)) path2 = path1 + "/.." path3 = path1 + "/../../tmp" sys.path.insert(0, path2) from base import webqtlConfig try: con = MySQLdb.Connect(db=webqtlConfig.DB_NAME,host=webqtlConfig.MYSQL_SERVER, user=webqtlConfig.DB_USER,passwd=webqtlConfig.DB_PASSWD) cursor = con.cursor() print "You have successfully connected to mysql.\n" except: print "You entered incorrect password.\n" sys.exit(0) taxIds = {'10090':1, '9606':4, '10116':2, '3702':3} taxIdKeys = taxIds.keys() os.chdir(path3) cdict = {} os.system("rm -f gene_info") os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz") os.system("gunzip gene_info.gz") try: fp = open("gene_info") except: print "gene_info doesn't exit" sys.exit(1) i=0 line = fp.readline() while line: line2 = map(string.strip, string.split(line.strip(), "\t")) if line2[0] in taxIdKeys: cdict[line2[1]] = line2[2] line = fp.readline() i += 1 if i%1000 == 0: print "finished ", i fp.close() os.system("rm -f generifs_basic") os.system("wget ftp://ftp.ncbi.nlm.nih.gov/gene/GeneRIF/generifs_basic.gz") os.system("gunzip generifs_basic.gz") try: fp = open("generifs_basic") except: print "generifs_basic doesn't exist" sys.exit(1) cursor.execute("delete from GeneRIF_BASIC") count = 0 line = fp.readline() while line: line2 = map(string.strip, string.split(line.strip(), "\t")) if line2[0] in taxIdKeys: count += 1 line2[0] = taxIds[line2[0]] if len(line2) !=5: print line else: try: symbol=cdict[line2[1]] except: symbol= "" line2 = line2[:2] + [symbol] + line2[2:] cursor.execute("insert into GeneRIF_BASIC(SpeciesId, GeneId, Symbol, PubMed_ID, createtime, comment) values(%s, %s, %s, %s, %s, %s)", tuple(line2)) line = fp.readline() fp.close() print count, "\n" cursor.close()