aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLei Yan2014-02-22 23:03:21 -0600
committerLei Yan2014-02-22 23:03:21 -0600
commit1e92d78d574956d88fef648cbde4306d78952626 (patch)
tree73b22bd2bca731ceafb9d08b5cf40d75887109f3
parentaf92bb48b2589d2804569863bc0306f787df1c60 (diff)
downloadgenenetwork2-1e92d78d574956d88fef648cbde4306d78952626.tar.gz
On branch master
-rw-r--r--wqflask/maintenance/dataset/datastructure.py41
-rw-r--r--wqflask/maintenance/dataset/load_genotypes.py68
-rw-r--r--wqflask/maintenance/dataset/load_phenotypes.py2
3 files changed, 39 insertions, 72 deletions
diff --git a/wqflask/maintenance/dataset/datastructure.py b/wqflask/maintenance/dataset/datastructure.py
index e1d88df2..f436d32c 100644
--- a/wqflask/maintenance/dataset/datastructure.py
+++ b/wqflask/maintenance/dataset/datastructure.py
@@ -94,7 +94,8 @@ def get_nextdataid_phenotype():
dataid += 1
return dataid
-def insert_strain(speciesid, strainname):
+def insert_strain(inbredsetid, strainname, updatestrainxref=None):
+ speciesid = get_species(inbredsetid)
cursor, con = utilities.get_cursor()
sql = """
INSERT INTO Strain
@@ -104,8 +105,32 @@ def insert_strain(speciesid, strainname):
Strain.`SpeciesId`=%s
"""
cursor.execute(sql, (strainname, strainname, speciesid))
+ strainid = con.insert_id()
+ if updatestrainxref:
+ sql = """
+ SELECT StrainXRef.`OrderId`
+ FROM StrainXRef
+ where StrainXRef.`InbredSetId`=%s
+ ORDER BY StrainXRef.`OrderId` DESC
+ LIMIT 1
+ """
+ cursor.execute(sql, (inbredsetid))
+ re = cursor.fetchone()
+ orderid = re[0] + 1
+ #
+ sql = """
+ INSERT INTO StrainXRef
+ SET
+ StrainXRef.`InbredSetId`=%s,
+ StrainXRef.`StrainId`=%s,
+ StrainXRef.`OrderId`=%s,
+ StrainXRef.`Used_for_mapping`=%s,
+ StrainXRef.`PedigreeStatus`=%s
+ """
+ cursor.execute(sql, (inbredsetid, strainid, orderid, "N", None))
-def get_strain(speciesid, strainname):
+def get_strain(inbredsetid, strainname):
+ speciesid = get_species(inbredsetid)
cursor, con = utilities.get_cursor()
sql = """
SELECT Strain.`Id`, Strain.`Name`
@@ -116,15 +141,15 @@ def get_strain(speciesid, strainname):
cursor.execute(sql, (speciesid, strainname))
return cursor.fetchone()
-def get_strain_sure(speciesid, strainname):
- strain = get_strain(speciesid, strainname)
+def get_strain_sure(inbredsetid, strainname, updatestrainxref=None):
+ strain = get_strain(inbredsetid, strainname)
if not strain:
- insert_strain(speciesid, strainname)
- strain = get_strain(speciesid, strainname)
+ insert_strain(inbredsetid, strainname, updatestrainxref)
+ strain = get_strain(inbredsetid, strainname)
return strain
-def get_strains_bynames(speciesid, strainnames):
+def get_strains_bynames(inbredsetid, strainnames, updatestrainxref=None):
strains = []
for strainname in strainnames:
- strains.append(get_strain_sure(speciesid, strainname))
+ strains.append(get_strain_sure(inbredsetid, strainname, updatestrainxref))
return strains
diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py
index 0e35fc24..ab3843a4 100644
--- a/wqflask/maintenance/dataset/load_genotypes.py
+++ b/wqflask/maintenance/dataset/load_genotypes.py
@@ -19,6 +19,9 @@ def main(argv):
genofreeze = datastructure.get_genofreeze_byinbredsetid(inbredsetid)
genofreezeid = genofreeze[0]
print "genofreezeid: %s" % genofreezeid
+ dataid = datastructure.get_nextdataid_genotype()
+ print "next data id: %s" % dataid
+ cursor, con = utilities.get_cursor()
genofile = open(config.get('config', 'genofile'), 'r')
metadic = {}
# parse genofile
@@ -43,7 +46,7 @@ def main(argv):
#
print "geno file head:\n\t%s" % line
strainnames = line.split()[4:]
- strains = datastructure.get_strains_bynames(speciesid, strainnames)
+ strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes")
continue
# geno line
cells = line.split()
@@ -55,68 +58,7 @@ def main(argv):
print values
return
- sql = """
- SELECT Id
- FROM GenoData
- ORDER BY Id DESC
- LIMIT 1
- """
- cursor.execute(sql)
- results = cursor.fetchall()
- dataid = results[0][0]
- print "speciesid: %s" % (speciesid)
- print "inbredsetid: %s" % (inbredsetid)
- print "genofreezeid: %s" % (genofreezeid)
- print "dataid start: %s" % (dataid+1)
- # samples
- line = file_geno.readline()
- sample_names = line.split()[4:]
- sample_ids = []
- print "get %d samples from file:\n%s" % (len(sample_names), sample_names)
- for sample_name in sample_names:
- sql = """
- select Id
- from Strain
- where SpeciesId=%s
- and Name like %s
- """
- cursor.execute(sql, (speciesid, sample_name))
- results = cursor.fetchall()
- if results:
- sample_ids.append(results[0][0])
- else:
- print "insert sample %s" % (sample_name)
- sql = """
- INSERT INTO Strain
- SET
- SpeciesId=%s,
- Name=%s,
- Name2=%s
- """
- cursor.execute(sql, (speciesid, sample_name, sample_name))
- sampleid = con.insert_id()
- sample_ids.append(sampleid)
- #
- sql = """
- SELECT OrderId
- FROM StrainXRef
- where InbredSetId=%s
- ORDER BY OrderId DESC
- LIMIT 1
- """
- cursor.execute(sql, (inbredsetid))
- results = cursor.fetchall()
- orderid = results[0][0] + 1
- #
- sql = """
- INSERT INTO StrainXRef
- SET
- InbredSetId=%s,
- StrainId=%s,
- OrderId=%s,
- Used_for_mapping=%s
- """
- cursor.execute(sql, (inbredsetid, sampleid, orderid, "N"))
+
print "load %d samples from DB:" % (len(sample_names))
for i in range(len(sample_names)):
print "%s\t%s" % (sample_names[i], sample_ids[i])
diff --git a/wqflask/maintenance/dataset/load_phenotypes.py b/wqflask/maintenance/dataset/load_phenotypes.py
index 4e7a5414..27e340f8 100644
--- a/wqflask/maintenance/dataset/load_phenotypes.py
+++ b/wqflask/maintenance/dataset/load_phenotypes.py
@@ -25,7 +25,7 @@ def main(argv):
phenotypedata_head = phenotypedata.next()
print "phenotypedata head:\n\t%s" % phenotypedata_head
strainnames = phenotypedata_head[1:]
- strains = datastructure.get_strains_bynames(speciesid, strainnames)
+ strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref=None)
# metafile
metafile = open(config.get('config', 'metafile'), 'r')
phenotypemeta = csv.reader(metafile, delimiter='\t', quotechar='"')