aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance/dataset
diff options
context:
space:
mode:
authorLei Yan2014-02-22 23:59:54 -0600
committerLei Yan2014-02-22 23:59:54 -0600
commit82ac054dc9f0cd2dcc848a75113e0759124c6527 (patch)
treed735683695515616031bfb107e371ef139b6fb3f /wqflask/maintenance/dataset
parent3e83499ba81888f49228d3f445820d4206e0ea46 (diff)
downloadgenenetwork2-82ac054dc9f0cd2dcc848a75113e0759124c6527.tar.gz
On branch master
Diffstat (limited to 'wqflask/maintenance/dataset')
-rw-r--r--wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini8
-rw-r--r--wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno2
-rw-r--r--wqflask/maintenance/dataset/load_genotypes.py96
3 files changed, 48 insertions, 58 deletions
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
index 7b2fba9d..abff371b 100644
--- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
+++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini
@@ -1,7 +1,7 @@
[config]
inbredsetid = 1
genofile = datasampledir/load_genotypes/sample.geno
-U = x
-H = 0
-B = -1
-D = 1
+genovalue_U = x
+genovalue_H = 0
+genovalue_B = -1
+genovalue_D = 1
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
index 618e4833..0024ffd1 100644
--- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
+++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno
@@ -5,7 +5,7 @@
@het:H
@unk:U
Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8
-1 rs6269442 0 3.482275 B B D D D
+1 rs6269442 0 3.482275 D B D D D
2 rs6365999 0.3 4.811062 B B D D D
3 rs6376963 0.895 5.008089 B B D D D
4 rs3677817 1.185 5.176058 B B D D D
diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py
index ab3843a4..c5cd35fb 100644
--- a/wqflask/maintenance/dataset/load_genotypes.py
+++ b/wqflask/maintenance/dataset/load_genotypes.py
@@ -48,89 +48,79 @@ def main(argv):
strainnames = line.split()[4:]
strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes")
continue
- # geno line
+ # geno file line
cells = line.split()
chr = cells[0]
locus = cells[1]
cm = cells[2]
mb = cells[3]
values = cells[4:]
- print values
- return
-
-
- print "load %d samples from DB:" % (len(sample_names))
- for i in range(len(sample_names)):
- print "%s\t%s" % (sample_names[i], sample_ids[i])
- # parse geno file
- index = 0
- for line in file_geno:
- index += 1
- if index % 1000 == 0:
- print index
- items = line.split()
- chr = items[0]
- name = items[1]
- cm = items[2]
- mb = items[3]
- values = items[4:]
# geno
sql = """
- SELECT Id
+ SELECT Geno.`Id`
FROM Geno
- WHERE SpeciesId=%s
- AND Name like %s
+ WHERE Geno.`SpeciesId`=%s
+ AND Geno.`Name` like %s
"""
- cursor.execute(sql, (speciesid, name))
- results = cursor.fetchall()
- if results:
- genoid = results[0][0]
+ cursor.execute(sql, (speciesid, locus))
+ result = cursor.fetchone()
+ if result:
+ genoid = result[0]
+ print "get geno record: %d" % genoid
else:
- print "insert geno %s" % (name)
sql = """
INSERT INTO Geno
SET
- SpeciesId=%s,
- Name=%s,
- Marker_Name=%s,
- Chr=%s,
- Mb=%s
+ Geno.`SpeciesId`=%s,
+ Geno.`Name`=%s,
+ Geno.`Marker_Name`=%s,
+ Geno.`Chr`=%s,
+ Geno.`Mb`=%s
"""
- cursor.execute(sql, (speciesid, name, name, chr, mb))
+ cursor.execute(sql, (speciesid, locus, locus, chr, mb))
+ rowcount = cursor.rowcount
genoid = con.insert_id()
+ print "INSERT INTO Geno: %d record: %d" % (rowcount, genoid)
# genodata
- dataid += 1
- for i in range(len(values)):
- sample_id = sample_ids[i]
+ for index, strain in enumerate(strains):
+ strainid = strain[0]
+ value = utilities.to_db_string(values[index], None)
+ if not value:
+ continue
+ value = config.get('config', "genovalue_" + value)
try:
- value = int(values[i])
- except ValueError:
+ number = int(value)
+ except:
continue
- if not value in [-1, 0, 1]:
- print sample_id, value
+ if not number in [-1, 0, 1]:
continue
sql = """
INSERT INTO GenoData
SET
- Id=%s,
- StrainId=%s,
- value=%s
+ GenoData.`Id`=%s,
+ GenoData.`StrainId`=%s,
+ GenoData.`value`=%s
"""
- cursor.execute(sql, (dataid, sample_id, value))
+ cursor.execute(sql, (dataid, strainid, number))
# genoxref
sql = """
INSERT INTO GenoXRef
SET
- GenoFreezeId=%s,
- GenoId=%s,
- DataId=%s,
- cM=%s,
- Used_for_mapping=%s
+ GenoXRef.`GenoFreezeId`=%s,
+ GenoXRef.`GenoId`=%s,
+ GenoXRef.`DataId`=%s,
+ GenoXRef.`cM`=%s,
+ GenoXRef.`Used_for_mapping`=%s
"""
cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
- print "Insert %d genoxref" % (index)
- # close
- file_geno.close()
+ rowcount = cursor.rowcount
+ genoxrefid = con.insert_id()
+ print "INSERT INTO GenoXRef: %d record: %d" % (rowcount, genoxrefid)
+ # for loop next
+ dataid += 1
+ print
+ # release
+ genofile.close()
con.close()
if __name__ == "__main__":