From a260db93da548ce7511521c5f63c03c3d279bcb1 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Mon, 10 Feb 2014 14:59:27 -0600 Subject: On branch master --- wqflask/maintenance/dataset/specials.py | 129 ------------------------------- wqflask/maintenance/dataset/specials1.py | 53 +++++++++++++ wqflask/maintenance/dataset/specials2.py | 109 ++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 129 deletions(-) delete mode 100644 wqflask/maintenance/dataset/specials.py create mode 100644 wqflask/maintenance/dataset/specials1.py create mode 100644 wqflask/maintenance/dataset/specials2.py diff --git a/wqflask/maintenance/dataset/specials.py b/wqflask/maintenance/dataset/specials.py deleted file mode 100644 index 4ff85333..00000000 --- a/wqflask/maintenance/dataset/specials.py +++ /dev/null @@ -1,129 +0,0 @@ -import utilities -import datastructure -import genotypes -import probesets -import calculate - -""" -For: Rob, GeneNetwork -Date: 2014-02-04 -Function: - For BXD group, fetch probesets with given locus (mapping info). - -locus="rs3663871" -""" -def bxd_probesets_locus(locus): - # - inbredsetid=1 - # - file = open('probesets_%s.txt' % (locus), 'w+') - file.write("GN Dataset ID\t") - file.write("Dataset Full Name\t") - file.write("ProbeSet Name\t") - file.write("Symbol\t") - file.write("ProbeSet Description\t") - file.write("Probe Target Description\t") - file.write("ProbeSet Chr\t") - file.write("ProbeSet Mb\t") - file.write("Mean\t") - file.write("LRS\t") - file.write("Geno Chr\t") - file.write("Geno Mb\t") - file.write("\n") - file.flush() - # - results = get_normalized_probeset(locus=locus, inbredsetid=inbredsetid) - for row in results: - file.write("%s\t" % (row[0])) - file.write("%s\t" % (utilities.clearspaces(row[2], default=''))) - file.write("%s\t" % (utilities.clearspaces(row[3], default=''))) - file.write("%s\t" % (utilities.clearspaces(row[4], default=''))) - file.write("%s\t" % (utilities.clearspaces(row[5], default=''))) - file.write("%s\t" % (utilities.clearspaces(row[6], default=''))) - file.write("%s\t" % (utilities.clearspaces(row[7], default=''))) - file.write("%s\t" % (row[8])) - file.write("%s\t" % (row[9])) - file.write("%s\t" % (row[10])) - file.write("%s\t" % (utilities.clearspaces(row[11], default=''))) - file.write("%s\t" % (row[12])) - file.write('\n') - file.flush() - file.close() - -""" -For: Ash -Date: 2014-02-05 -Function: - For BXD group, calculate correlations with genotypes and probesets. -Running History: - 2014-02-05 /home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output -""" -def bxd_correlations(): - # - inbredsetid = 1 - genofile = "/home/leiyan/gn/web/genotypes/BXD.geno" - outputdir = "/home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output" - # - t = genotypes.load_genos(genofile) - genostrains = t[0] - genos = t[1] - print "From geno file, get %d strains" % (len(genostrains)) - print "From geno file, get %d genos" % (len(genos)) - # - probesetfreezes = datastructure.get_probesetfreezes(inbredsetid) - print "From DB, get %d probesetfreezes" % (len(probesetfreezes)) - # - for probesetfreeze in probesetfreezes: - # - print probesetfreeze - probesetfreezeid = probesetfreeze[0] - probesetfreezename = probesetfreeze[1] - probesetfreezefullname = probesetfreeze[2] - # - outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") - outputfile.write("%s\t" % "ProbeSet Id") - outputfile.write("%s\t" % "ProbeSet Name") - outputfile.write("%s\t" % "Geno Name") - outputfile.write("%s\t" % "Overlap Number") - outputfile.write("%s\t" % "Pearson r") - outputfile.write("%s\t" % "Pearson p") - outputfile.write("%s\t" % "Spearman r") - outputfile.write("%s\t" % "Spearman p") - outputfile.write("\n") - outputfile.flush() - # - probesetxrefs = probesets.get_probesetxref(probesetfreezeid) - print "Get %d probesetxrefs" % (len(probesetxrefs)) - # - for probesetxref in probesetxrefs: - # - probesetid = probesetxref[0] - probesetdataid = probesetxref[1] - probeset = probesets.get_probeset(probesetid) - probesetname = probeset[1] - probesetdata = probesets.get_probesetdata(probesetdataid) - probesetdata = zip(*probesetdata) - probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) - # - for geno in genos: - genoname = geno['locus'] - outputfile.write("%s\t" % probesetid) - outputfile.write("%s\t" % probesetname) - outputfile.write("%s\t" % genoname) - # - dic1 = geno['dicvalues'] - dic2 = probesetdata - keys, values1, values2 = utilities.overlap(dic1, dic2) - rs = calculate.correlation(values1, values2) - # - outputfile.write("%s\t" % len(keys)) - outputfile.write("%s\t" % rs[0][0]) - outputfile.write("%s\t" % rs[0][1]) - outputfile.write("%s\t" % rs[1][0]) - outputfile.write("%s\t" % rs[1][1]) - outputfile.write("\n") - outputfile.flush() - # - outputfile.close() - -bxd_correlations() diff --git a/wqflask/maintenance/dataset/specials1.py b/wqflask/maintenance/dataset/specials1.py new file mode 100644 index 00000000..9159fd7f --- /dev/null +++ b/wqflask/maintenance/dataset/specials1.py @@ -0,0 +1,53 @@ +import utilities +import datastructure +import genotypes +import probesets +import calculate + +""" +For: Rob, GeneNetwork +Date: 2014-02-04 +Function: + For BXD group, fetch probesets with given locus (mapping info). + +locus="rs3663871" +""" +def bxd_probesets_locus(locus, inbredsetid): + # + file = open('probesets_%s.txt' % (locus), 'w+') + file.write("GN Dataset ID\t") + file.write("Dataset Full Name\t") + file.write("ProbeSet Name\t") + file.write("Symbol\t") + file.write("ProbeSet Description\t") + file.write("Probe Target Description\t") + file.write("ProbeSet Chr\t") + file.write("ProbeSet Mb\t") + file.write("Mean\t") + file.write("LRS\t") + file.write("Geno Chr\t") + file.write("Geno Mb\t") + file.write("\n") + file.flush() + # + results = probesets.get_normalized_probeset(locus=locus, inbredsetid=inbredsetid) + for row in results: + file.write("%s\t" % (row[0])) + file.write("%s\t" % (utilities.clearspaces(row[2], default=''))) + file.write("%s\t" % (utilities.clearspaces(row[3], default=''))) + file.write("%s\t" % (utilities.clearspaces(row[4], default=''))) + file.write("%s\t" % (utilities.clearspaces(row[5], default=''))) + file.write("%s\t" % (utilities.clearspaces(row[6], default=''))) + file.write("%s\t" % (utilities.clearspaces(row[7], default=''))) + file.write("%s\t" % (row[8])) + file.write("%s\t" % (row[9])) + file.write("%s\t" % (row[10])) + file.write("%s\t" % (utilities.clearspaces(row[11], default=''))) + file.write("%s\t" % (row[12])) + file.write('\n') + file.flush() + file.close() + +locus='rs3663871' +inbredsetid=1 +bxd_probesets_locus(locus=locus, inbredsetid=inbredsetid) diff --git a/wqflask/maintenance/dataset/specials2.py b/wqflask/maintenance/dataset/specials2.py new file mode 100644 index 00000000..2acfd2ef --- /dev/null +++ b/wqflask/maintenance/dataset/specials2.py @@ -0,0 +1,109 @@ +import utilities +import datastructure +import genotypes +import probesets +import calculate + +""" +For: Ash +Date: 2014-02-07 +Function: + For BXD group, get a probesetfreeze name list. +""" +def probesetfreeze_list(): + # + inbredsetid = 1 + outputdir = "/home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output" + # + probesetfreezes = datastructure.get_probesetfreezes(inbredsetid) + print "From DB, get %d probesetfreezes" % (len(probesetfreezes)) + file = open(outputdir + '/' + 'probesetfreezes.txt', 'w+') + # + for probesetfreeze in probesetfreezes: + # + print probesetfreeze + probesetfreezeid = probesetfreeze[0] + probesetfreezename = probesetfreeze[1] + probesetfreezefullname = probesetfreeze[2] + # + file.write("%s\t" % probesetfreezeid) + file.write("%s" % probesetfreezefullname) + file.write("\n") + file.flush() + # + file.close() + +""" +For: Ash +Date: 2014-02-05 +Function: + For BXD group, calculate correlations with genotypes and probesets. +""" +def bxd_correlations(): + # + inbredsetid = 1 + genofile = "/home/leiyan/gn/web/genotypes/BXD.geno" + outputdir = "/home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output" + # + t = genotypes.load_genos(genofile) + genostrains = t[0] + genos = t[1] + print "From geno file, get %d strains" % (len(genostrains)) + print "From geno file, get %d genos" % (len(genos)) + # + probesetfreezes = datastructure.get_probesetfreezes(inbredsetid) + print "From DB, get %d probesetfreezes" % (len(probesetfreezes)) + for probesetfreeze in probesetfreezes: + correlations(genos, probesetfreeze) + +def correlations(genos, probesetfreeze): + print probesetfreeze + probesetfreezeid = probesetfreeze[0] + probesetfreezename = probesetfreeze[1] + probesetfreezefullname = probesetfreeze[2] + # + outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") + outputfile.write("%s\t" % "ProbeSet Id") + outputfile.write("%s\t" % "ProbeSet Name") + outputfile.write("%s\t" % "Geno Name") + outputfile.write("%s\t" % "Overlap Number") + outputfile.write("%s\t" % "Pearson r") + outputfile.write("%s\t" % "Pearson p") + outputfile.write("%s\t" % "Spearman r") + outputfile.write("%s\t" % "Spearman p") + outputfile.write("\n") + outputfile.flush() + # + probesetxrefs = probesets.get_probesetxref(probesetfreezeid) + print "Get %d probesetxrefs" % (len(probesetxrefs)) + # + for probesetxref in probesetxrefs: + # + probesetid = probesetxref[0] + probesetdataid = probesetxref[1] + probeset = probesets.get_probeset(probesetid) + probesetname = probeset[1] + probesetdata = probesets.get_probesetdata(probesetdataid) + probesetdata = zip(*probesetdata) + probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) + # + for geno in genos: + genoname = geno['locus'] + outputfile.write("%s\t" % probesetid) + outputfile.write("%s\t" % probesetname) + outputfile.write("%s\t" % genoname) + # + dic1 = geno['dicvalues'] + dic2 = probesetdata + keys, values1, values2 = utilities.overlap(dic1, dic2) + rs = calculate.correlation(values1, values2) + # + outputfile.write("%s\t" % len(keys)) + outputfile.write("%s\t" % rs[0][0]) + outputfile.write("%s\t" % rs[0][1]) + outputfile.write("%s\t" % rs[1][0]) + outputfile.write("%s\t" % rs[1][1]) + outputfile.write("\n") + outputfile.flush() + # + outputfile.close() \ No newline at end of file -- cgit v1.2.3