From 1e92d78d574956d88fef648cbde4306d78952626 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Sat, 22 Feb 2014 23:03:21 -0600 Subject: On branch master --- wqflask/maintenance/dataset/datastructure.py | 41 +++++++++++++--- wqflask/maintenance/dataset/load_genotypes.py | 68 ++------------------------ wqflask/maintenance/dataset/load_phenotypes.py | 2 +- 3 files changed, 39 insertions(+), 72 deletions(-) (limited to 'wqflask') diff --git a/wqflask/maintenance/dataset/datastructure.py b/wqflask/maintenance/dataset/datastructure.py index e1d88df2..f436d32c 100644 --- a/wqflask/maintenance/dataset/datastructure.py +++ b/wqflask/maintenance/dataset/datastructure.py @@ -94,7 +94,8 @@ def get_nextdataid_phenotype(): dataid += 1 return dataid -def insert_strain(speciesid, strainname): +def insert_strain(inbredsetid, strainname, updatestrainxref=None): + speciesid = get_species(inbredsetid) cursor, con = utilities.get_cursor() sql = """ INSERT INTO Strain @@ -104,8 +105,32 @@ def insert_strain(speciesid, strainname): Strain.`SpeciesId`=%s """ cursor.execute(sql, (strainname, strainname, speciesid)) + strainid = con.insert_id() + if updatestrainxref: + sql = """ + SELECT StrainXRef.`OrderId` + FROM StrainXRef + where StrainXRef.`InbredSetId`=%s + ORDER BY StrainXRef.`OrderId` DESC + LIMIT 1 + """ + cursor.execute(sql, (inbredsetid)) + re = cursor.fetchone() + orderid = re[0] + 1 + # + sql = """ + INSERT INTO StrainXRef + SET + StrainXRef.`InbredSetId`=%s, + StrainXRef.`StrainId`=%s, + StrainXRef.`OrderId`=%s, + StrainXRef.`Used_for_mapping`=%s, + StrainXRef.`PedigreeStatus`=%s + """ + cursor.execute(sql, (inbredsetid, strainid, orderid, "N", None)) -def get_strain(speciesid, strainname): +def get_strain(inbredsetid, strainname): + speciesid = get_species(inbredsetid) cursor, con = utilities.get_cursor() sql = """ SELECT Strain.`Id`, Strain.`Name` @@ -116,15 +141,15 @@ def get_strain(speciesid, strainname): cursor.execute(sql, (speciesid, strainname)) return cursor.fetchone() -def get_strain_sure(speciesid, strainname): - strain = get_strain(speciesid, strainname) +def get_strain_sure(inbredsetid, strainname, updatestrainxref=None): + strain = get_strain(inbredsetid, strainname) if not strain: - insert_strain(speciesid, strainname) - strain = get_strain(speciesid, strainname) + insert_strain(inbredsetid, strainname, updatestrainxref) + strain = get_strain(inbredsetid, strainname) return strain -def get_strains_bynames(speciesid, strainnames): +def get_strains_bynames(inbredsetid, strainnames, updatestrainxref=None): strains = [] for strainname in strainnames: - strains.append(get_strain_sure(speciesid, strainname)) + strains.append(get_strain_sure(inbredsetid, strainname, updatestrainxref)) return strains diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py index 0e35fc24..ab3843a4 100644 --- a/wqflask/maintenance/dataset/load_genotypes.py +++ b/wqflask/maintenance/dataset/load_genotypes.py @@ -19,6 +19,9 @@ def main(argv): genofreeze = datastructure.get_genofreeze_byinbredsetid(inbredsetid) genofreezeid = genofreeze[0] print "genofreezeid: %s" % genofreezeid + dataid = datastructure.get_nextdataid_genotype() + print "next data id: %s" % dataid + cursor, con = utilities.get_cursor() genofile = open(config.get('config', 'genofile'), 'r') metadic = {} # parse genofile @@ -43,7 +46,7 @@ def main(argv): # print "geno file head:\n\t%s" % line strainnames = line.split()[4:] - strains = datastructure.get_strains_bynames(speciesid, strainnames) + strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes") continue # geno line cells = line.split() @@ -55,68 +58,7 @@ def main(argv): print values return - sql = """ - SELECT Id - FROM GenoData - ORDER BY Id DESC - LIMIT 1 - """ - cursor.execute(sql) - results = cursor.fetchall() - dataid = results[0][0] - print "speciesid: %s" % (speciesid) - print "inbredsetid: %s" % (inbredsetid) - print "genofreezeid: %s" % (genofreezeid) - print "dataid start: %s" % (dataid+1) - # samples - line = file_geno.readline() - sample_names = line.split()[4:] - sample_ids = [] - print "get %d samples from file:\n%s" % (len(sample_names), sample_names) - for sample_name in sample_names: - sql = """ - select Id - from Strain - where SpeciesId=%s - and Name like %s - """ - cursor.execute(sql, (speciesid, sample_name)) - results = cursor.fetchall() - if results: - sample_ids.append(results[0][0]) - else: - print "insert sample %s" % (sample_name) - sql = """ - INSERT INTO Strain - SET - SpeciesId=%s, - Name=%s, - Name2=%s - """ - cursor.execute(sql, (speciesid, sample_name, sample_name)) - sampleid = con.insert_id() - sample_ids.append(sampleid) - # - sql = """ - SELECT OrderId - FROM StrainXRef - where InbredSetId=%s - ORDER BY OrderId DESC - LIMIT 1 - """ - cursor.execute(sql, (inbredsetid)) - results = cursor.fetchall() - orderid = results[0][0] + 1 - # - sql = """ - INSERT INTO StrainXRef - SET - InbredSetId=%s, - StrainId=%s, - OrderId=%s, - Used_for_mapping=%s - """ - cursor.execute(sql, (inbredsetid, sampleid, orderid, "N")) + print "load %d samples from DB:" % (len(sample_names)) for i in range(len(sample_names)): print "%s\t%s" % (sample_names[i], sample_ids[i]) diff --git a/wqflask/maintenance/dataset/load_phenotypes.py b/wqflask/maintenance/dataset/load_phenotypes.py index 4e7a5414..27e340f8 100644 --- a/wqflask/maintenance/dataset/load_phenotypes.py +++ b/wqflask/maintenance/dataset/load_phenotypes.py @@ -25,7 +25,7 @@ def main(argv): phenotypedata_head = phenotypedata.next() print "phenotypedata head:\n\t%s" % phenotypedata_head strainnames = phenotypedata_head[1:] - strains = datastructure.get_strains_bynames(speciesid, strainnames) + strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref=None) # metafile metafile = open(config.get('config', 'metafile'), 'r') phenotypemeta = csv.reader(metafile, delimiter='\t', quotechar='"') -- cgit v1.2.3