diff options
Diffstat (limited to 'wqflask/maintenance')
3 files changed, 48 insertions, 58 deletions
diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini index 7b2fba9d..abff371b 100644 --- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini +++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/config.ini @@ -1,7 +1,7 @@ [config] inbredsetid = 1 genofile = datasampledir/load_genotypes/sample.geno -U = x -H = 0 -B = -1 -D = 1 +genovalue_U = x +genovalue_H = 0 +genovalue_B = -1 +genovalue_D = 1 diff --git a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno index 618e4833..0024ffd1 100644 --- a/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno +++ b/wqflask/maintenance/dataset/datasampledir/load_genotypes/sample.geno @@ -5,7 +5,7 @@ @het:H @unk:U Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 -1 rs6269442 0 3.482275 B B D D D +1 rs6269442 0 3.482275 D B D D D 2 rs6365999 0.3 4.811062 B B D D D 3 rs6376963 0.895 5.008089 B B D D D 4 rs3677817 1.185 5.176058 B B D D D diff --git a/wqflask/maintenance/dataset/load_genotypes.py b/wqflask/maintenance/dataset/load_genotypes.py index ab3843a4..c5cd35fb 100644 --- a/wqflask/maintenance/dataset/load_genotypes.py +++ b/wqflask/maintenance/dataset/load_genotypes.py @@ -48,89 +48,79 @@ def main(argv): strainnames = line.split()[4:] strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid, strainnames=strainnames, updatestrainxref="yes") continue - # geno line + # geno file line cells = line.split() chr = cells[0] locus = cells[1] cm = cells[2] mb = cells[3] values = cells[4:] - print values - return - - - print "load %d samples from DB:" % (len(sample_names)) - for i in range(len(sample_names)): - print "%s\t%s" % (sample_names[i], sample_ids[i]) - # parse geno file - index = 0 - for line in file_geno: - index += 1 - if index % 1000 == 0: - print index - items = line.split() - chr = items[0] - name = items[1] - cm = items[2] - mb = items[3] - values = items[4:] # geno sql = """ - SELECT Id + SELECT Geno.`Id` FROM Geno - WHERE SpeciesId=%s - AND Name like %s + WHERE Geno.`SpeciesId`=%s + AND Geno.`Name` like %s """ - cursor.execute(sql, (speciesid, name)) - results = cursor.fetchall() - if results: - genoid = results[0][0] + cursor.execute(sql, (speciesid, locus)) + result = cursor.fetchone() + if result: + genoid = result[0] + print "get geno record: %d" % genoid else: - print "insert geno %s" % (name) sql = """ INSERT INTO Geno SET - SpeciesId=%s, - Name=%s, - Marker_Name=%s, - Chr=%s, - Mb=%s + Geno.`SpeciesId`=%s, + Geno.`Name`=%s, + Geno.`Marker_Name`=%s, + Geno.`Chr`=%s, + Geno.`Mb`=%s """ - cursor.execute(sql, (speciesid, name, name, chr, mb)) + cursor.execute(sql, (speciesid, locus, locus, chr, mb)) + rowcount = cursor.rowcount genoid = con.insert_id() + print "INSERT INTO Geno: %d record: %d" % (rowcount, genoid) # genodata - dataid += 1 - for i in range(len(values)): - sample_id = sample_ids[i] + for index, strain in enumerate(strains): + strainid = strain[0] + value = utilities.to_db_string(values[index], None) + if not value: + continue + value = config.get('config', "genovalue_" + value) try: - value = int(values[i]) - except ValueError: + number = int(value) + except: continue - if not value in [-1, 0, 1]: - print sample_id, value + if not number in [-1, 0, 1]: continue sql = """ INSERT INTO GenoData SET - Id=%s, - StrainId=%s, - value=%s + GenoData.`Id`=%s, + GenoData.`StrainId`=%s, + GenoData.`value`=%s """ - cursor.execute(sql, (dataid, sample_id, value)) + cursor.execute(sql, (dataid, strainid, number)) # genoxref sql = """ INSERT INTO GenoXRef SET - GenoFreezeId=%s, - GenoId=%s, - DataId=%s, - cM=%s, - Used_for_mapping=%s + GenoXRef.`GenoFreezeId`=%s, + GenoXRef.`GenoId`=%s, + GenoXRef.`DataId`=%s, + GenoXRef.`cM`=%s, + GenoXRef.`Used_for_mapping`=%s """ cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N')) - print "Insert %d genoxref" % (index) - # close - file_geno.close() + rowcount = cursor.rowcount + genoxrefid = con.insert_id() + print "INSERT INTO GenoXRef: %d record: %d" % (rowcount, genoxrefid) + # for loop next + dataid += 1 + print + # release + genofile.close() con.close() if __name__ == "__main__": |