diff options
author | Zachary Sloan | 2014-07-21 16:07:42 +0000 |
---|---|---|
committer | Zachary Sloan | 2014-07-21 16:07:42 +0000 |
commit | d952a23662eb4c46041be3945b5c3ccacf5506b6 (patch) | |
tree | 00750c5c88fb7dc6f300a148ea40f95a7b46be3a /wqflask/maintenance/dataset/specials3.py | |
parent | 818de422631392c246646b52a5b227d23153e667 (diff) | |
parent | c424db452c243c6f0f64ee58d2d7baeb147dd3c8 (diff) | |
download | genenetwork2-d952a23662eb4c46041be3945b5c3ccacf5506b6.tar.gz |
Merge /home/lei/gene
Diffstat (limited to 'wqflask/maintenance/dataset/specials3.py')
-rw-r--r-- | wqflask/maintenance/dataset/specials3.py | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/wqflask/maintenance/dataset/specials3.py b/wqflask/maintenance/dataset/specials3.py new file mode 100644 index 00000000..237df27e --- /dev/null +++ b/wqflask/maintenance/dataset/specials3.py @@ -0,0 +1,117 @@ +import utilities +import datastructure +import genotypes +import probesets +import calculate + +def correlations(outputdir, genos, probesetfreeze): + print probesetfreeze + probesetfreezeid = probesetfreeze[0] + probesetfreezename = probesetfreeze[1] + probesetfreezefullname = probesetfreeze[2] + # + outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") + outputfile.write("%s\t" % "ProbeSet Id") + outputfile.write("%s\t" % "ProbeSet Name") + outputfile.write("%s\t" % "Geno Name") + outputfile.write("%s\t" % "Overlap Number") + outputfile.write("%s\t" % "Pearson r") + outputfile.write("%s\t" % "Pearson p") + outputfile.write("%s\t" % "Spearman r") + outputfile.write("%s\t" % "Spearman p") + outputfile.write("\n") + outputfile.flush() + # + probesetxrefs = probesets.get_probesetxref(probesetfreezeid) + print "Get %d probesetxrefs" % (len(probesetxrefs)) + # + for probesetxref in probesetxrefs: + # + probesetid = probesetxref[0] + probesetdataid = probesetxref[1] + probeset = probesets.get_probeset(probesetid) + probesetname = probeset[1] + probesetdata = probesets.get_probesetdata(probesetdataid) + probesetdata = zip(*probesetdata) + probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) + # + for geno in genos: + genoname = geno['locus'] + outputfile.write("%s\t" % probesetid) + outputfile.write("%s\t" % probesetname) + outputfile.write("%s\t" % genoname) + # + dic1 = geno['dicvalues'] + dic2 = probesetdata + keys, values1, values2 = utilities.overlap(dic1, dic2) + rs = calculate.correlation(values1, values2) + # + outputfile.write("%s\t" % len(keys)) + outputfile.write("%s\t" % rs[0][0]) + outputfile.write("%s\t" % rs[0][1]) + outputfile.write("%s\t" % rs[1][0]) + outputfile.write("%s\t" % rs[1][1]) + outputfile.write("\n") + outputfile.flush() + # + outputfile.close() + +""" +For: Ash +Date: 2014-02-12 +Function: + Generate probeset data files. + given probesetfreeze list. +""" +def generate_probesets(probesetfreezesfile, outputdir): + file = open(probesetfreezesfile, 'r') + for line in file: + line = line.strip() + cells = line.split() + probesetfreezeid = cells[0] + probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid) + probesetfreezeid = probesetfreeze[0] + probesetfreezename = probesetfreeze[1] + inbredset = datastructure.get_inbredset(probesetfreezeid) + inbredsetid = inbredset[0] + strains = datastructure.get_strains(inbredsetid) + # + outputfile = open("%s/%d_%s.txt" % (outputdir, probesetfreezeid, probesetfreezename), "w+") + outputfile.write("%s\t" % "ProbeSet Id") + outputfile.write("%s\t" % "ProbeSet Name") + outputfile.write('\t'.join([strain[1].upper() for strain in strains])) + outputfile.write("\n") + outputfile.flush() + # + probesetxrefs = probesets.get_probesetxref(probesetfreezeid) + print probesetfreeze + print len(probesetxrefs) + for probesetxref in probesetxrefs: + probesetid = probesetxref[0] + probesetdataid = probesetxref[1] + probeset = probesets.get_probeset(probesetid) + probesetname = probeset[1] + probesetdata = probesets.get_probesetdata(probesetdataid) + probesetdata = zip(*probesetdata) + probesetdata = utilities.to_dic([strain.lower() for strain in probesetdata[1]], probesetdata[2]) + # + outputfile.write("%s\t" % probesetid) + outputfile.write("%s\t" % probesetname) + # + for strain in strains: + strainname = strain[1] + strainname = strainname.lower() + if strainname in probesetdata: + value = probesetdata[strainname] + else: + value = 'x' + outputfile.write("%s\t" % value) + outputfile.write("\n") + outputfile.flush() + # + outputfile.close() + file.close() + +probesetfreezesfile = "/home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output2/probesetfreezes_filter.txt" +outputdir = "/home/leiyan/gn2/wqflask/maintenance/dataset/datadir/20140205_Ash_correlations/output2" +generate_probesets(probesetfreezesfile, outputdir) |