about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLei Yan2014-02-22 23:59:54 -0600
committerLei Yan2014-02-22 23:59:54 -0600
commit82ac054dc9f0cd2dcc848a75113e0759124c6527 (patch)
treed735683695515616031bfb107e371ef139b6fb3f
parent3e83499ba81888f49228d3f445820d4206e0ea46 (diff)
downloadgenenetwork2-82ac054dc9f0cd2dcc848a75113e0759124c6527.tar.gz
On branch master
-rw-r--r--wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini8
-rw-r--r--wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno2
-rw-r--r--wqflask/maintenance/dataset/load_genotypes.py96
3 files changed, 48 insertions, 58 deletions
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
index 7b2fba9d..abff371b 100644
--- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
+++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
@@ -1,7 +1,7 @@
 [config]
 inbredsetid = 1
 genofile = datasampledir/load_genotypes/sample.geno
-U = x
-H = 0
-B = -1
-D = 1
+genovalue_U = x
+genovalue_H = 0
+genovalue_B = -1
+genovalue_D = 1
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
index 618e4833..0024ffd1 100644
--- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
+++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
@@ -5,7 +5,7 @@
 @het:H
 @unk:U
 Chr	Locus	cM	Mb	BXD1	BXD2	BXD5	BXD6	BXD8
-1	rs6269442	0	3.482275	B	B	D	D	D
+1	rs6269442	0	3.482275	D	B	D	D	D
 2	rs6365999	0.3	4.811062	B	B	D	D	D
 3	rs6376963	0.895	5.008089	B	B	D	D	D
 4	rs3677817	1.185	5.176058	B	B	D	D	D
diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py
index ab3843a4..c5cd35fb 100644
--- a/wqflask/maintenance/dataset/load_genotypes.py
+++ b/wqflask/maintenance/dataset/load_genotypes.py
@@ -48,89 +48,79 @@ def main(argv):
             strainnames = line.split()[4:]
             strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes")
             continue
-        # geno line
+        # geno file line
         cells = line.split()
         chr = cells[0]
         locus = cells[1]
         cm = cells[2]
         mb = cells[3]
         values = cells[4:]
-        print values
-    return
-
-            
-    print "load %d samples from DB:" % (len(sample_names))
-    for i in range(len(sample_names)):
-        print "%s\t%s" % (sample_names[i], sample_ids[i])
-    # parse geno file
-    index = 0
-    for line in file_geno:
-        index += 1
-        if index % 1000 == 0:
-            print index
-        items = line.split()
-        chr = items[0]
-        name = items[1]
-        cm = items[2]
-        mb = items[3]
-        values = items[4:]
         # geno
         sql = """
-            SELECT Id
+            SELECT Geno.`Id`
             FROM Geno
-            WHERE SpeciesId=%s
-            AND Name like %s
+            WHERE Geno.`SpeciesId`=%s
+            AND Geno.`Name` like %s
             """
-        cursor.execute(sql, (speciesid, name))
-        results = cursor.fetchall()
-        if results:
-            genoid = results[0][0]
+        cursor.execute(sql, (speciesid, locus))
+        result = cursor.fetchone()
+        if result:
+            genoid = result[0]
+            print "get geno record: %d" % genoid
         else:
-            print "insert geno %s" % (name)
             sql = """
                 INSERT INTO Geno
                 SET
-                    SpeciesId=%s,
-                    Name=%s,
-                    Marker_Name=%s,
-                    Chr=%s,
-                    Mb=%s
+                Geno.`SpeciesId`=%s,
+                Geno.`Name`=%s,
+                Geno.`Marker_Name`=%s,
+                Geno.`Chr`=%s,
+                Geno.`Mb`=%s
                 """
-            cursor.execute(sql, (speciesid, name, name, chr, mb))
+            cursor.execute(sql, (speciesid, locus, locus, chr, mb))
+            rowcount = cursor.rowcount
             genoid = con.insert_id()
+            print "INSERT INTO Geno: %d record: %d" % (rowcount, genoid)
         # genodata
-        dataid += 1
-        for i in range(len(values)):
-            sample_id = sample_ids[i]
+        for index, strain in enumerate(strains):
+            strainid = strain[0]
+            value = utilities.to_db_string(values[index], None)
+            if not value:
+                continue
+            value = config.get('config', "genovalue_" + value)
             try:
-                value = int(values[i])
-            except ValueError:
+                number = int(value)
+            except:
                 continue
-            if not value in [-1, 0, 1]:
-                print sample_id, value
+            if not number in [-1, 0, 1]:
                 continue
             sql = """
                 INSERT INTO GenoData
                 SET
-                    Id=%s,
-                    StrainId=%s,
-                    value=%s
+                GenoData.`Id`=%s,
+                GenoData.`StrainId`=%s,
+                GenoData.`value`=%s
                 """
-            cursor.execute(sql, (dataid, sample_id, value))
+            cursor.execute(sql, (dataid, strainid, number))
         # genoxref
         sql = """
             INSERT INTO GenoXRef
             SET
-                GenoFreezeId=%s,
-                GenoId=%s,
-                DataId=%s,
-                cM=%s,
-                Used_for_mapping=%s
+            GenoXRef.`GenoFreezeId`=%s,
+            GenoXRef.`GenoId`=%s,
+            GenoXRef.`DataId`=%s,
+            GenoXRef.`cM`=%s,
+            GenoXRef.`Used_for_mapping`=%s
             """
         cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
-    print "Insert %d genoxref" % (index)
-    # close
-    file_geno.close()
+        rowcount = cursor.rowcount
+        genoxrefid = con.insert_id()
+        print "INSERT INTO GenoXRef: %d record: %d" % (rowcount, genoxrefid)
+        # for loop next
+        dataid += 1
+        print
+    # release
+    genofile.close()
     con.close()
 
 if __name__ == "__main__":