From 839115abbdf1b7e90da1d4db7758a2f7f8f83037 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Tue, 4 Feb 2014 12:14:14 -0600 Subject: On branch master --- wqflask/maintenance/dataset/datastructure.py | 18 ++++++++++++------ wqflask/maintenance/dataset/phenotypes.py | 20 ++++---------------- wqflask/maintenance/dataset/probesets.py | 26 +++++++++++++++++++++++--- wqflask/maintenance/dataset/utilities.py | 11 ++++++++++- 4 files changed, 49 insertions(+), 26 deletions(-) (limited to 'wqflask/maintenance/dataset') diff --git a/wqflask/maintenance/dataset/datastructure.py b/wqflask/maintenance/dataset/datastructure.py index 73e1c0d8..f94009f5 100644 --- a/wqflask/maintenance/dataset/datastructure.py +++ b/wqflask/maintenance/dataset/datastructure.py @@ -1,11 +1,7 @@ -import sys -sys.path.append('.') -sys.path.append('..') - -from utilities import db +import utilities def get_probesetfreeze(inbredsetid): - cursor = db.get_cursor() + cursor = utilities.get_cursor() sql = """ SELECT ProbeSetFreeze.`Id`, ProbeSetFreeze.`Name`, ProbeSetFreeze.`FullName` FROM ProbeSetFreeze, ProbeFreeze @@ -14,4 +10,14 @@ def get_probesetfreeze(inbredsetid): """ cursor.execute(sql, (inbredsetid)) return cursor.fetchall() + +def get_probesetfreeze(probesetfreezeid): + cursor = utilities.get_cursor() + sql = """ + SELECT ProbeSetFreeze.`Id`, ProbeSetFreeze.`Name`, ProbeSetFreeze.`FullName` + FROM ProbeSetFreeze + WHERE ProbeSetFreeze.`Id`=%s + """ + cursor.execute(sql, (probesetfreezeid)) + return cursor.fetchone() \ No newline at end of file diff --git a/wqflask/maintenance/dataset/phenotypes.py b/wqflask/maintenance/dataset/phenotypes.py index c31ad0e3..1f58d9a8 100644 --- a/wqflask/maintenance/dataset/phenotypes.py +++ b/wqflask/maintenance/dataset/phenotypes.py @@ -1,10 +1,6 @@ # Author: Lei Yan # import -import sys -import os -import re -import MySQLdb import utilities @@ -48,10 +44,10 @@ def fetch(): print "get %d phenotypes" % (len(results)) for phenotyperow in results: publishxrefid = phenotyperow[0] - authors = clearspaces(phenotyperow[1]) - original_description = clearspaces(phenotyperow[2]) - pre_publication_description = clearspaces(phenotyperow[3]) - post_publication_description = clearspaces(phenotyperow[4]) + authors = utilities.clearspaces(phenotyperow[1]) + original_description = utilities.clearspaces(phenotyperow[2]) + pre_publication_description = utilities.clearspaces(phenotyperow[3]) + post_publication_description = utilities.clearspaces(phenotyperow[4]) phenotypesfile.write("%s\t%s\t%s\t%s\t%s\t" % (publishxrefid, authors, original_description, pre_publication_description, post_publication_description)) sql = """ SELECT Strain.Name, PublishData.value @@ -80,14 +76,6 @@ def fetch(): phenotypesfile.flush() # release phenotypesfile.close() - -def clearspaces(s): - if s: - s = re.sub('\s+', ' ', s) - s = s.strip() - return s - else: - return None # main if __name__ == "__main__": diff --git a/wqflask/maintenance/dataset/probesets.py b/wqflask/maintenance/dataset/probesets.py index 06b9a394..26d794a8 100644 --- a/wqflask/maintenance/dataset/probesets.py +++ b/wqflask/maintenance/dataset/probesets.py @@ -1,6 +1,7 @@ import sys import utilities +import datastructure import genotypes def get_probesetxref(probesetfreezeid): @@ -48,7 +49,7 @@ def get_probesetxref_probesetfreezeid(locus, probesetfreezeid): def get_probesetxref_inbredsetid(locus, inbredsetid): cursor = utilities.get_cursor() sql = """ - SELECT ProbeSetXRef.`ProbeSetId`, ProbeSetXRef.`mean`, ProbeSetXRef.`LRS`, ProbeSetXRef.`Locus` + SELECT ProbeSetXRef.`ProbeSetId`, ProbeSetXRef.`mean`, ProbeSetXRef.`LRS`, ProbeSetXRef.`Locus`, ProbeSetXRef.`ProbeSetFreezeId` FROM (ProbeSetXRef, ProbeSetFreeze, ProbeFreeze) WHERE ProbeSetXRef.`ProbeSetFreezeId`=ProbeSetFreeze.`Id` AND ProbeSetFreeze.`ProbeFreezeId`=ProbeFreeze.`Id` @@ -63,6 +64,13 @@ def get_normalized_probeset(locus, inbredsetid): probesetxrefs = get_probesetxref_inbredsetid(locus, inbredsetid) for probesetxref in probesetxrefs: normalized_probeset = [] + # + probesetfreezeid = probesetxref[4] + probesetfreeze = datastructure.get_probesetfreeze(probesetfreezeid) + normalized_probeset.append(probesetfreeze[0]) + normalized_probeset.append(probesetfreeze[1]) + normalized_probeset.append(probesetfreeze[2]) + # probesetid = probesetxref[0] probeset = get_probeset(probesetid) normalized_probeset.append(probeset[1]) @@ -71,13 +79,25 @@ def get_normalized_probeset(locus, inbredsetid): normalized_probeset.append(probeset[4]) normalized_probeset.append(probeset[5]) normalized_probeset.append(probeset[6]) + # normalized_probeset.append(probesetxref[1]) normalized_probeset.append(probesetxref[2]) + # locus = probesetxref[3] geno = genotypes.get_geno(inbredsetid=inbredsetid, name=locus) normalized_probeset.append(geno[2]) normalized_probeset.append(geno[3]) + # normalized_probesets.append(normalized_probeset) - print normalized_probesets[:2] -get_normalized_probeset(locus="rs3663871", inbredsetid=1) +locus="rs3663871" +inbredsetid=1 + +results = get_normalized_probeset(locus=locus, inbredsetid=inbredsetid) +file = open('probesets_%s.txt' % (locus), 'w+') +file.write("GN dataset ID\t\n") +file.flush() +for row in results: + file.write(row[0]) + file.flush() +file.close() diff --git a/wqflask/maintenance/dataset/utilities.py b/wqflask/maintenance/dataset/utilities.py index 453ee707..5ffa9047 100644 --- a/wqflask/maintenance/dataset/utilities.py +++ b/wqflask/maintenance/dataset/utilities.py @@ -1,4 +1,5 @@ import MySQLdb +import re def get_cursor(): host = 'localhost' @@ -7,4 +8,12 @@ def get_cursor(): db = 'db_webqtl' con = MySQLdb.Connect(db=db, host=host, user=user, passwd=passwd) cursor = con.cursor() - return cursor \ No newline at end of file + return cursor + +def clearspaces(s): + if s: + s = re.sub('\s+', ' ', s) + s = s.strip() + return s + else: + return None -- cgit v1.2.3