about summary refs log tree commit diff
path: root/wqflask/maintenance
diff options
context:
space:
mode:
authorLei Yan2014-02-28 00:00:42 +0000
committerLei Yan2014-02-28 00:00:42 +0000
commit07dceea5c550891c00b33f4d665f1de2ec936fea (patch)
treee8b54999a9e9dbf67d6e8e3f0d934aaccdac96f6 /wqflask/maintenance
parent01a6c3c6c9769f1ab8c30de77441502d403b04b3 (diff)
downloadgenenetwork2-07dceea5c550891c00b33f4d665f1de2ec936fea.tar.gz
Made some changes to Lei's IO code for GN1 and GN2 genofiles
Diffstat (limited to 'wqflask/maintenance')
-rw-r--r--wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno2
-rw-r--r--wqflask/maintenance/dataset/load_genotypes.py221
-rw-r--r--wqflask/maintenance/dataset/utilities.py4
l---------wqflask/maintenance/our_settings.py2
4 files changed, 134 insertions, 95 deletions
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
index 0024ffd1..a28d31fc 100644
--- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
+++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
@@ -9,4 +9,4 @@ Chr	Locus	cM	Mb	BXD1	BXD2	BXD5	BXD6	BXD8
 2	rs6365999	0.3	4.811062	B	B	D	D	D
 3	rs6376963	0.895	5.008089	B	B	D	D	D
 4	rs3677817	1.185	5.176058	B	B	D	D	D
-5	rs8236463	2.081	5.579193	B	B	D	D	D
+5	rstest8236463	2.081	5.579193	B	B	D	D	D
diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py
index 31aaf1aa..fbf6484f 100644
--- a/wqflask/maintenance/dataset/load_genotypes.py
+++ b/wqflask/maintenance/dataset/load_genotypes.py
@@ -1,31 +1,48 @@
+#Do whatever else is needed with the Marker object
+#Probably create Genofile object as well
+#Make sure rest of code works with params object (though
+#everything in the params object should probably just be the parameters of
+#the Genofile object)
+
+
+from __future__ import absolute_import, print_function, division
+
 import sys
 import re
+import argparse
 
 import utilities
 import datastructure
 
-def main(argv):
+def main():
+    parser = argparse.ArgumentParser(description='Load Genotypes')
+    parser.add_argument('-c', '--config')
+    opts = parser.parse_args()
+    config = opts.config
     # config
-    config = utilities.get_config(argv[1])
-    print "config:"
+    config = utilities.get_config(config)
+    print("config:")
     for item in config.items('config'):
-        print "\t%s" % (str(item))
+        print("\t", str(item))
+    parse_genofile(fetch_parameters(config))
+
+def fetch_parameters(config):
     # variables
-    inbredsetid = config.get('config', 'inbredsetid')
-    print "inbredsetid: %s" % inbredsetid
-    species = datastructure.get_species(inbredsetid)
-    speciesid = species[0]
-    print "speciesid: %s" % speciesid
-    genofreeze = datastructure.get_genofreeze_byinbredsetid(inbredsetid)
-    genofreezeid = genofreeze[0]
-    print "genofreezeid: %s" % genofreezeid
-    dataid = datastructure.get_nextdataid_genotype()
-    print "next data id: %s" % dataid
-    cursor, con = utilities.get_cursor()
+    params = {}
+    params['inbredsetid'] = config.get('config', 'inbredsetid')
+    species = datastructure.get_species(params['inbredsetid'])
+    params["speciesid"] = species[0]
+    genofreeze = datastructure.get_genofreeze_byinbredsetid(params['inbredsetid'])
+    params['genofreezeid'] = genofreeze[0]
+    params['dataid'] = datastructure.get_nextdataid_genotype()
+    params['genofile'] = config.get('config', 'genofile')
+    return params
+    
+def parse_genofile(params):
     # genofile
-    genofile = open(config.get('config', 'genofile'), 'r')
+    genofile = open(params['genofile'], 'r')
     metadic = {}
-    print
+    print()
     # parse genofile
     for line in genofile:
         line = line.strip()
@@ -42,90 +59,112 @@ def main(argv):
             continue
         if line.lower().startswith("chr"):
             #
-            print "geno file meta:"
+            print("geno file meta:")
             for k, v in metadic.items():
-                print "\t%s: %s" % (k, v)
+                print("\t{}: {}".format(k, v))
             #
-            print "geno file head:\n\t%s" % line
-            print
+            print("geno file head:\n\t{}\n".format(line))
             strainnames = line.split()[4:]
             strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes")
             continue
         # geno file line
-        cells = line.split()
-        chr = cells[0]
-        locus = cells[1]
-        cm = cells[2]
-        mb = cells[3]
-        values = cells[4:]
-        # geno
+        marker = Marker(line)
+        #
+        genoid = check_or_insert_geno(params, marker)
+        if check_genoxref(params):
+            continue
+        insert_genodata(params)
+        insert_genoxref(params)
+        dataid += 1
+    genofile.close()
+    
+    
+class Marker(object):
+    def __init__(self, line):
+        self.cells = line.split()
+        self.chromosome = cells[0]
+        self.locus = cells[1]
+        self.cm = cells[2]
+        self.mb = cells[3]
+        self.values = cells[4:]
+        
+def check_or_insert_geno(params, marker):
+    cursor, con = utilities.get_cursor()
+    sql = """
+        SELECT Geno.`Id`
+        FROM Geno
+        WHERE Geno.`SpeciesId`=%s
+        AND Geno.`Name` like %s
+        """
+    cursor.execute(sql, (speciesid, locus))
+    result = cursor.fetchone()
+    if result:
+        genoid = result[0]
+        print("get geno record: %d" % genoid)
+    else:
         sql = """
-            SELECT Geno.`Id`
-            FROM Geno
-            WHERE Geno.`SpeciesId`=%s
-            AND Geno.`Name` like %s
+            INSERT INTO Geno
+            SET
+            Geno.`SpeciesId`=%s,
+            Geno.`Name`=%s,
+            Geno.`Marker_Name`=%s,
+            Geno.`Chr`=%s,
+            Geno.`Mb`=%s
             """
-        cursor.execute(sql, (speciesid, locus))
-        result = cursor.fetchone()
-        if result:
-            genoid = result[0]
-            print "get geno record: %d" % genoid
-        else:
-            sql = """
-                INSERT INTO Geno
-                SET
-                Geno.`SpeciesId`=%s,
-                Geno.`Name`=%s,
-                Geno.`Marker_Name`=%s,
-                Geno.`Chr`=%s,
-                Geno.`Mb`=%s
-                """
-            cursor.execute(sql, (speciesid, locus, locus, chr, mb))
-            rowcount = cursor.rowcount
-            genoid = con.insert_id()
-            print "INSERT INTO Geno: %d record: %d" % (rowcount, genoid)
-        # genodata
-        for index, strain in enumerate(strains):
-            strainid = strain[0]
-            value = utilities.to_db_string(values[index], None)
-            if not value:
-                continue
-            value = config.get('config', "genovalue_" + value)
-            try:
-                number = int(value)
-            except:
-                continue
-            if not number in [-1, 0, 1]:
-                continue
-            sql = """
-                INSERT INTO GenoData
-                SET
-                GenoData.`Id`=%s,
-                GenoData.`StrainId`=%s,
-                GenoData.`value`=%s
-                """
-            cursor.execute(sql, (dataid, strainid, number))
-        # genoxref
+        cursor.execute(sql, (speciesid, locus, locus, chr, mb))
+        rowcount = cursor.rowcount
+        genoid = con.insert_id()
+        print("INSERT INTO Geno: %d record: %d" % (rowcount, genoid))
+    return genoid
+
+def check_GenoXRef():
+    sql = """
+        select GenoXRef.*
+        from GenoXRef
+        where GenoXRef.`GenoFreezeId`=%s
+        AND GenoXRef.`GenoId`=%s
+        """
+    cursor.execute(sql, (genofreezeid, genoid))
+    rowcount = cursor.rowcount
+    return rowcount
+    
+def insert_genodata():
+    for index, strain in enumerate(strains):
+        strainid = strain[0]
+        value = utilities.to_db_string(values[index], None)
+        if not value:
+            continue
+        value = config.get('config', "genovalue_" + value)
+        try:
+            number = int(value)
+        except:
+            continue
+        if not number in [-1, 0, 1]:
+            continue
         sql = """
-            INSERT INTO GenoXRef
+            INSERT INTO GenoData
             SET
-            GenoXRef.`GenoFreezeId`=%s,
-            GenoXRef.`GenoId`=%s,
-            GenoXRef.`DataId`=%s,
-            GenoXRef.`cM`=%s,
-            GenoXRef.`Used_for_mapping`=%s
+            GenoData.`Id`=%s,
+            GenoData.`StrainId`=%s,
+            GenoData.`value`=%s
             """
-        cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
-        rowcount = cursor.rowcount
-        print "INSERT INTO GenoXRef: %d record" % (rowcount)
-        # for loop next
-        dataid += 1
-        print
-    # release
-    genofile.close()
-    con.close()
+        cursor.execute(sql, (dataid, strainid, number))
+
+def insert_genoxref():
+    sql = """
+        INSERT INTO GenoXRef
+        SET
+        GenoXRef.`GenoFreezeId`=%s,
+        GenoXRef.`GenoId`=%s,
+        GenoXRef.`DataId`=%s,
+        GenoXRef.`cM`=%s,
+        GenoXRef.`Used_for_mapping`=%s
+        """
+    cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
+    rowcount = cursor.rowcount
+    print("INSERT INTO GenoXRef: %d record" % (rowcount))
 
 if __name__ == "__main__":
-    print "command line arguments:\n\t%s" % sys.argv
-    main(sys.argv)
-    print "exit successfully"
+    print("command line arguments:\n\t%s" % sys.argv)
+    main()
+    print("exit successfully")
diff --git a/wqflask/maintenance/dataset/utilities.py b/wqflask/maintenance/dataset/utilities.py
index d389e672..787c9481 100644
--- a/wqflask/maintenance/dataset/utilities.py
+++ b/wqflask/maintenance/dataset/utilities.py
@@ -4,8 +4,8 @@ import ConfigParser
 
 def get_cursor():
     host = 'localhost'
-    user = 'webqtl'
-    passwd = 'webqtl'
+    user = 'gn2'
+    passwd = 'UhHJuiS6gC8hj4a'
     db = 'db_webqtl'
     con = MySQLdb.Connect(db=db, host=host, user=user, passwd=passwd)
     cursor = con.cursor()
diff --git a/wqflask/maintenance/our_settings.py b/wqflask/maintenance/our_settings.py
index 14efe407..b14de960 120000
--- a/wqflask/maintenance/our_settings.py
+++ b/wqflask/maintenance/our_settings.py
@@ -1 +1 @@
-../../../zach_settings.py
\ No newline at end of file
+../../../lei_settings.py
\ No newline at end of file