diff options
author | Lei Yan | 2014-02-28 00:00:42 +0000 |
---|---|---|
committer | Lei Yan | 2014-02-28 00:00:42 +0000 |
commit | 07dceea5c550891c00b33f4d665f1de2ec936fea (patch) | |
tree | e8b54999a9e9dbf67d6e8e3f0d934aaccdac96f6 /wqflask/maintenance/dataset | |
parent | 01a6c3c6c9769f1ab8c30de77441502d403b04b3 (diff) | |
download | genenetwork2-07dceea5c550891c00b33f4d665f1de2ec936fea.tar.gz |
Made some changes to Lei's IO code for GN1 and GN2 genofiles
Diffstat (limited to 'wqflask/maintenance/dataset')
-rw-r--r-- | wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno | 2 | ||||
-rw-r--r-- | wqflask/maintenance/dataset/load_genotypes.py | 221 | ||||
-rw-r--r-- | wqflask/maintenance/dataset/utilities.py | 4 |
3 files changed, 133 insertions, 94 deletions
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno index 0024ffd1..a28d31fc 100644 --- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno +++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno @@ -9,4 +9,4 @@ Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 2 rs6365999 0.3 4.811062 B B D D D 3 rs6376963 0.895 5.008089 B B D D D 4 rs3677817 1.185 5.176058 B B D D D -5 rs8236463 2.081 5.579193 B B D D D +5 rstest8236463 2.081 5.579193 B B D D D diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py index 31aaf1aa..fbf6484f 100644 --- a/wqflask/maintenance/dataset/load_genotypes.py +++ b/wqflask/maintenance/dataset/load_genotypes.py @@ -1,31 +1,48 @@ +#Do whatever else is needed with the Marker object +#Probably create Genofile object as well +#Make sure rest of code works with params object (though +#everything in the params object should probably just be the parameters of +#the Genofile object) + + +from __future__ import absolute_import, print_function, division + import sys import re +import argparse import utilities import datastructure -def main(argv): +def main(): + parser = argparse.ArgumentParser(description='Load Genotypes') + parser.add_argument('-c', '--config') + opts = parser.parse_args() + config = opts.config # config - config = utilities.get_config(argv[1]) - print "config:" + config = utilities.get_config(config) + print("config:") for item in config.items('config'): - print "\t%s" % (str(item)) + print("\t", str(item)) + parse_genofile(fetch_parameters(config)) + +def fetch_parameters(config): # variables - inbredsetid = config.get('config', 'inbredsetid') - print "inbredsetid: %s" % inbredsetid - species = datastructure.get_species(inbredsetid) - speciesid = species[0] - print "speciesid: %s" % speciesid - genofreeze = datastructure.get_genofreeze_byinbredsetid(inbredsetid) - genofreezeid = genofreeze[0] - print "genofreezeid: %s" % genofreezeid - dataid = datastructure.get_nextdataid_genotype() - print "next data id: %s" % dataid - cursor, con = utilities.get_cursor() + params = {} + params['inbredsetid'] = config.get('config', 'inbredsetid') + species = datastructure.get_species(params['inbredsetid']) + params["speciesid"] = species[0] + genofreeze = datastructure.get_genofreeze_byinbredsetid(params['inbredsetid']) + params['genofreezeid'] = genofreeze[0] + params['dataid'] = datastructure.get_nextdataid_genotype() + params['genofile'] = config.get('config', 'genofile') + return params + +def parse_genofile(params): # genofile - genofile = open(config.get('config', 'genofile'), 'r') + genofile = open(params['genofile'], 'r') metadic = {} - print + print() # parse genofile for line in genofile: line = line.strip() @@ -42,90 +59,112 @@ def main(argv): continue if line.lower().startswith("chr"): # - print "geno file meta:" + print("geno file meta:") for k, v in metadic.items(): - print "\t%s: %s" % (k, v) + print("\t{}: {}".format(k, v)) # - print "geno file head:\n\t%s" % line - print + print("geno file head:\n\t{}\n".format(line)) strainnames = line.split()[4:] strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes") continue # geno file line - cells = line.split() - chr = cells[0] - locus = cells[1] - cm = cells[2] - mb = cells[3] - values = cells[4:] - # geno + marker = Marker(line) + # + genoid = check_or_insert_geno(params, marker) + if check_genoxref(params): + continue + insert_genodata(params) + insert_genoxref(params) + dataid += 1 + genofile.close() + + +class Marker(object): + def __init__(self, line): + self.cells = line.split() + self.chromosome = cells[0] + self.locus = cells[1] + self.cm = cells[2] + self.mb = cells[3] + self.values = cells[4:] + +def check_or_insert_geno(params, marker): + cursor, con = utilities.get_cursor() + sql = """ + SELECT Geno.`Id` + FROM Geno + WHERE Geno.`SpeciesId`=%s + AND Geno.`Name` like %s + """ + cursor.execute(sql, (speciesid, locus)) + result = cursor.fetchone() + if result: + genoid = result[0] + print("get geno record: %d" % genoid) + else: sql = """ - SELECT Geno.`Id` - FROM Geno - WHERE Geno.`SpeciesId`=%s - AND Geno.`Name` like %s + INSERT INTO Geno + SET + Geno.`SpeciesId`=%s, + Geno.`Name`=%s, + Geno.`Marker_Name`=%s, + Geno.`Chr`=%s, + Geno.`Mb`=%s """ - cursor.execute(sql, (speciesid, locus)) - result = cursor.fetchone() - if result: - genoid = result[0] - print "get geno record: %d" % genoid - else: - sql = """ - INSERT INTO Geno - SET - Geno.`SpeciesId`=%s, - Geno.`Name`=%s, - Geno.`Marker_Name`=%s, - Geno.`Chr`=%s, - Geno.`Mb`=%s - """ - cursor.execute(sql, (speciesid, locus, locus, chr, mb)) - rowcount = cursor.rowcount - genoid = con.insert_id() - print "INSERT INTO Geno: %d record: %d" % (rowcount, genoid) - # genodata - for index, strain in enumerate(strains): - strainid = strain[0] - value = utilities.to_db_string(values[index], None) - if not value: - continue - value = config.get('config', "genovalue_" + value) - try: - number = int(value) - except: - continue - if not number in [-1, 0, 1]: - continue - sql = """ - INSERT INTO GenoData - SET - GenoData.`Id`=%s, - GenoData.`StrainId`=%s, - GenoData.`value`=%s - """ - cursor.execute(sql, (dataid, strainid, number)) - # genoxref + cursor.execute(sql, (speciesid, locus, locus, chr, mb)) + rowcount = cursor.rowcount + genoid = con.insert_id() + print("INSERT INTO Geno: %d record: %d" % (rowcount, genoid)) + return genoid + +def check_GenoXRef(): + sql = """ + select GenoXRef.* + from GenoXRef + where GenoXRef.`GenoFreezeId`=%s + AND GenoXRef.`GenoId`=%s + """ + cursor.execute(sql, (genofreezeid, genoid)) + rowcount = cursor.rowcount + return rowcount + +def insert_genodata(): + for index, strain in enumerate(strains): + strainid = strain[0] + value = utilities.to_db_string(values[index], None) + if not value: + continue + value = config.get('config', "genovalue_" + value) + try: + number = int(value) + except: + continue + if not number in [-1, 0, 1]: + continue sql = """ - INSERT INTO GenoXRef + INSERT INTO GenoData SET - GenoXRef.`GenoFreezeId`=%s, - GenoXRef.`GenoId`=%s, - GenoXRef.`DataId`=%s, - GenoXRef.`cM`=%s, - GenoXRef.`Used_for_mapping`=%s + GenoData.`Id`=%s, + GenoData.`StrainId`=%s, + GenoData.`value`=%s """ - cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N')) - rowcount = cursor.rowcount - print "INSERT INTO GenoXRef: %d record" % (rowcount) - # for loop next - dataid += 1 - print - # release - genofile.close() - con.close() + cursor.execute(sql, (dataid, strainid, number)) + +def insert_genoxref(): + sql = """ + INSERT INTO GenoXRef + SET + GenoXRef.`GenoFreezeId`=%s, + GenoXRef.`GenoId`=%s, + GenoXRef.`DataId`=%s, + GenoXRef.`cM`=%s, + GenoXRef.`Used_for_mapping`=%s + """ + cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N')) + rowcount = cursor.rowcount + print("INSERT INTO GenoXRef: %d record" % (rowcount)) if __name__ == "__main__": - print "command line arguments:\n\t%s" % sys.argv - main(sys.argv) - print "exit successfully" + print("command line arguments:\n\t%s" % sys.argv) + main() + print("exit successfully") diff --git a/wqflask/maintenance/dataset/utilities.py b/wqflask/maintenance/dataset/utilities.py index d389e672..787c9481 100644 --- a/wqflask/maintenance/dataset/utilities.py +++ b/wqflask/maintenance/dataset/utilities.py @@ -4,8 +4,8 @@ import ConfigParser def get_cursor(): host = 'localhost' - user = 'webqtl' - passwd = 'webqtl' + user = 'gn2' + passwd = 'UhHJuiS6gC8hj4a' db = 'db_webqtl' con = MySQLdb.Connect(db=db, host=host, user=user, passwd=passwd) cursor = con.cursor() |