aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
authorLei Yan2017-02-02 19:08:31 +0000
committerLei Yan2017-02-02 19:08:31 +0000
commit568adbb1bfe72c22fae9bec7da7a421945674743 (patch)
tree1e0c445912af7ccb273a25b858889048eb96f756 /wqflask/base
parent1cae8e42f0f73718549b369cf0cd4d9f320e230e (diff)
parente63c4014e7bc34b440707be19af3779b72102fdb (diff)
downloadgenenetwork2-568adbb1bfe72c22fae9bec7da7a421945674743.tar.gz
Merge /home/zas1024/gene
Diffstat (limited to 'wqflask/base')
-rw-r--r--wqflask/base/data_set.py39
-rw-r--r--wqflask/base/trait.py942
-rw-r--r--wqflask/base/webqtlCaseData.py10
3 files changed, 480 insertions, 511 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ae7fdcb5..41c5d8ba 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -44,7 +44,7 @@ from db import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
from utility import chunks
-from utility.tools import locate, locate_ignore_error
+from utility.tools import locate, locate_ignore_error, flat_files
from maintenance import get_group_samplelists
@@ -53,7 +53,7 @@ from pprint import pformat as pf
from db.gn_server import menu_main
from db.call import fetchall,fetchone,fetch1
-from utility.tools import USE_GN_SERVER, USE_REDIS
+from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists
from utility.logger import getLogger
logger = getLogger(__name__ )
@@ -226,7 +226,7 @@ class Markers(object):
class HumanMarkers(Markers):
def __init__(self, name, specified_markers = []):
- marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
+ marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim')
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
@@ -271,7 +271,8 @@ class DatasetGroup(object):
self.f1list = None
self.parlist = None
self.get_f1_parent_strains()
- #logger.debug("parents/f1s: {}:{}".format(self.parlist, self.f1list))
+
+ self.accession_id = self.get_accession_id()
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
@@ -280,15 +281,39 @@ class DatasetGroup(object):
self._datasets = None
self.genofile = None
+ def get_accession_id(self):
+ results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
+ InbredSet.Name = %s and
+ PublishFreeze.InbredSetId = InbredSet.Id and
+ InfoFiles.InfoPageName = PublishFreeze.Name and
+ PublishFreeze.public > 0 and
+ PublishFreeze.confidentiality < 1 order by
+ PublishFreeze.CreateTime desc""", (self.name)).fetchone()
+
+ if results != None:
+ return str(results[0])
+ else:
+ return "None"
+
def get_specified_markers(self, markers = []):
self.markers = HumanMarkers(self.name, markers)
def get_markers(self):
- #logger.debug("self.species is:", self.species)
- if self.species == "human":
+ logger.debug("self.species is:", self.species)
+
+ def check_plink_gemma():
+ if flat_file_exists("mapping"):
+ MAPPING_PATH = flat_files("mapping")+"/"
+ if (os.path.isfile(MAPPING_PATH+self.name+".bed") and
+ (os.path.isfile(MAPPING_PATH+self.name+".map") or
+ os.path.isfile(MAPPING_PATH+self.name+".bim"))):
+ return True
+ return False
+
+ if check_plink_gemma():
marker_class = HumanMarkers
else:
- marker_class = Markers
+ marker_class = Markers
self.markers = marker_class(self.name)
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 276c624a..bf87e879 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -23,6 +23,8 @@ from flask import Flask, g, request
from utility.logger import getLogger
logger = getLogger(__name__ )
+from wqflask import user_manager
+
def print_mem(stage=""):
mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
print("{}: {}".format(stage, mem/1024))
@@ -55,6 +57,7 @@ class GeneralTrait(object):
self.lrs = None
self.pvalue = None
self.mean = None
+ self.additive = None
self.num_overlap = None
self.strand_probe = None
self.symbol = None
@@ -69,45 +72,9 @@ class GeneralTrait(object):
# Todo: These two lines are necessary most of the time, but perhaps not all of the time
# So we could add a simple if statement to short-circuit this if necessary
- self.retrieve_info(get_qtl_info=get_qtl_info)
+ self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
if get_sample_info != False:
- self.retrieve_sample_data()
-
-
- def jsonable(self):
- """Return a dict suitable for using as json
-
- Actual turning into json doesn't happen here though"""
-
- if self.dataset.type == "ProbeSet":
- return dict(name=self.name,
- symbol=self.symbol,
- dataset=self.dataset.name,
- description=self.description_display,
- mean=self.mean,
- location=self.location_repr,
- lrs_score=self.LRS_score_repr,
- lrs_location=self.LRS_location_repr,
- additive=self.additive
- )
- elif self.dataset.type == "Publish":
- return dict(name=self.name,
- dataset=self.dataset.name,
- description=self.description_display,
- authors=self.authors,
- pubmed_text=self.pubmed_text,
- pubmed_link=self.pubmed_link,
- lrs_score=self.LRS_score_repr,
- lrs_location=self.LRS_location_repr,
- additive=self.additive
- )
- elif self.dataset.type == "Geno":
- return dict(name=self.name,
- dataset=self.dataset.name,
- location=self.location_repr
- )
- else:
- return dict()
+ self = retrieve_sample_data(self, self.dataset)
def get_name(self):
@@ -213,410 +180,6 @@ class GeneralTrait(object):
return samples, vals, the_vars, sample_aliases
- #
- # In ProbeSet, there are maybe several annotations match one sequence
- # so we need use sequence(BlatSeq) as the identification, when we update
- # one annotation, we update the others who match the sequence also.
- #
- # Hongqiang Li, 3/3/2008
- #
- #def getSequence(self):
- # assert self.cursor
- # if self.dataset.type == 'ProbeSet':
- # self.cursor.execute('''
- # SELECT
- # ProbeSet.BlatSeq
- # FROM
- # ProbeSet, ProbeSetFreeze, ProbeSetXRef
- # WHERE
- # ProbeSet.Id=ProbeSetXRef.ProbeSetId and
- # ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
- # ProbeSet.Name = %s
- # ProbeSetFreeze.Name = %s
- # ''', self.name, self.dataset.name)
- # #self.cursor.execute(query)
- # results = self.fetchone()
- #
- # return results[0]
-
-
-
- def retrieve_sample_data(self, samplelist=None):
- if samplelist == None:
- samplelist = []
-
- results = self.dataset.retrieve_sample_data(self.name)
-
- # Todo: is this necessary? If not remove
- self.data.clear()
-
- all_samples_ordered = self.dataset.group.all_samples_ordered()
-
- if results:
- for item in results:
- name, value, variance, num_cases, name2 = item
- if not samplelist or (samplelist and name in samplelist):
- self.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
-
- def retrieve_info(self, get_qtl_info=False):
- assert self.dataset, "Dataset doesn't exist"
- if self.dataset.type == 'Publish':
- query = """
- SELECT
- PublishXRef.Id, Publication.PubMed_ID,
- Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
- Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
- Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
- Publication.Authors, Publication.Title, Publication.Abstract,
- Publication.Journal, Publication.Volume, Publication.Pages,
- Publication.Month, Publication.Year, PublishXRef.Sequence,
- Phenotype.Units, PublishXRef.comments
- FROM
- PublishXRef, Publication, Phenotype, PublishFreeze
- WHERE
- PublishXRef.Id = %s AND
- Phenotype.Id = PublishXRef.PhenotypeId AND
- Publication.Id = PublishXRef.PublicationId AND
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishFreeze.Id = %s
- """ % (self.name, self.dataset.id)
-
- logger.sql(query)
- trait_info = g.db.execute(query).fetchone()
-
-
- #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
- #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
- elif self.dataset.type == 'ProbeSet':
- display_fields_string = ', ProbeSet.'.join(self.dataset.display_fields)
- display_fields_string = 'ProbeSet.' + display_fields_string
- query = """
- SELECT %s
- FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
- WHERE
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeSet.Name = '%s'
- """ % (escape(display_fields_string),
- escape(self.dataset.name),
- escape(str(self.name)))
- logger.sql(query)
- trait_info = g.db.execute(query).fetchone()
- #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
- # to avoid the problem of same marker name from different species.
- elif self.dataset.type == 'Geno':
- display_fields_string = string.join(self.dataset.display_fields,',Geno.')
- display_fields_string = 'Geno.' + display_fields_string
- query = """
- SELECT %s
- FROM Geno, GenoFreeze, GenoXRef
- WHERE
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoXRef.GenoId = Geno.Id AND
- GenoFreeze.Name = '%s' AND
- Geno.Name = '%s'
- """ % (escape(display_fields_string),
- escape(self.dataset.name),
- escape(self.name))
- logger.sql(query)
- trait_info = g.db.execute(query).fetchone()
- else: #Temp type
- query = """SELECT %s FROM %s WHERE Name = %s"""
- logger.sql(query)
- trait_info = g.db.execute(query,
- (string.join(self.dataset.display_fields,','),
- self.dataset.type, self.name)).fetchone()
- if trait_info:
- self.haveinfo = True
-
- #XZ: assign SQL query result to trait attributes.
- for i, field in enumerate(self.dataset.display_fields):
- holder = trait_info[i]
- if isinstance(trait_info[i], basestring):
- holder = unicode(trait_info[i], "utf8", "ignore")
- setattr(self, field, holder)
-
- if self.dataset.type == 'Publish':
- self.confidential = 0
- if self.pre_publication_description and not self.pubmed_id:
- self.confidential = 1
-
- description = self.post_publication_description
-
- #If the dataset is confidential and the user has access to confidential
- #phenotype traits, then display the pre-publication description instead
- #of the post-publication description
- if self.confidential:
- self.description_display = self.pre_publication_description
-
- #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
- # privilege=self.dataset.privilege,
- # userName=self.dataset.userName,
- # authorized_users=self.authorized_users):
- #
- # description = self.pre_publication_description
- else:
- if description:
- self.description_display = description.strip()
- else:
- self.description_display = ""
-
- if not self.year.isdigit():
- self.pubmed_text = "N/A"
- else:
- self.pubmed_text = self.year
-
- if self.pubmed_id:
- self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id
-
-
- self.homologeneid = None
- if self.dataset.type == 'ProbeSet' and self.dataset.group:
- if self.geneid:
- #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
- #XZ: So I have to test if geneid is number before execute the query.
- #XZ: The geneid values in database should be cleaned up.
- #try:
- # float(self.geneid)
- # geneidIsNumber = True
- #except ValueError:
- # geneidIsNumber = False
- #if geneidIsNumber:
- query = """
- SELECT
- HomologeneId
- FROM
- Homologene, Species, InbredSet
- WHERE
- Homologene.GeneId ='%s' AND
- InbredSet.Name = '%s' AND
- InbredSet.SpeciesId = Species.Id AND
- Species.TaxonomyId = Homologene.TaxonomyId
- """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
- logger.sql(query)
- result = g.db.execute(query).fetchone()
- #else:
- # result = None
-
- if result:
- self.homologeneid = result[0]
-
- description_string = unicode(str(self.description).strip(codecs.BOM_UTF8), 'utf-8')
- target_string = unicode(str(self.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
-
- if len(description_string) > 1 and description_string != 'None':
- description_display = description_string
- else:
- description_display = self.symbol
-
- if (len(description_display) > 1 and description_display != 'N/A' and
- len(target_string) > 1 and target_string != 'None'):
- description_display = description_display + '; ' + target_string.strip()
-
- # Save it for the jinja2 template
- self.description_display = description_display
-
- #XZ: trait_location_value is used for sorting
- trait_location_repr = 'N/A'
- trait_location_value = 1000000
-
- if self.chr and self.mb:
- #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
- #This is so we can convert the location to a number used for sorting
- trait_location_value = convert_location_to_value(self.chr, self.mb)
- #try:
- # trait_location_value = int(self.chr)*1000 + self.mb
- #except ValueError:
- # if self.chr.upper() == 'X':
- # trait_location_value = 20*1000 + self.mb
- # else:
- # trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
- # self.mb)
-
- #ZS: Put this in function currently called "convert_location_to_value"
- self.location_repr = 'Chr%s: %.6f' % (self.chr, float(self.mb))
- self.location_value = trait_location_value
-
-
- if get_qtl_info:
- #LRS and its location
- self.LRS_score_repr = "N/A"
- self.LRS_score_value = 0
- self.LRS_location_repr = "N/A"
- self.LRS_location_value = 1000000
- if self.dataset.type == 'ProbeSet' and not self.cellid:
- query = """
- SELECT
- ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
- FROM
- ProbeSetXRef, ProbeSet
- WHERE
- ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSet.Name = "{}" AND
- ProbeSetXRef.ProbeSetFreezeId ={}
- """.format(self.name, self.dataset.id)
- logger.sql(query)
- trait_qtl = g.db.execute(query).fetchone()
- if trait_qtl:
- self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl
- if self.locus:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '{}' and
- Geno.Name = '{}' and
- Geno.SpeciesId = Species.Id
- """.format(self.dataset.group.species, self.locus)
- logger.sql(query)
- result = g.db.execute(query).fetchone()
- if result:
- self.locus_chr = result[0]
- self.locus_mb = result[1]
- else:
- self.locus = self.locus_chr = self.locus_mb = self.additive = ""
- else:
- self.locus = self.locus_chr = self.locus_mb = self.additive = ""
- else:
- self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = self.additive = ""
-
-
- if self.dataset.type == 'Publish':
- query = """
- SELECT
- PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
- FROM
- PublishXRef, PublishFreeze
- WHERE
- PublishXRef.Id = %s AND
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishFreeze.Id =%s
- """ % (self.name, self.dataset.id)
- logger.sql(query)
- trait_qtl = g.db.execute(query).fetchone()
- if trait_qtl:
- self.locus, self.lrs, self.additive = trait_qtl
- if self.locus:
- query = """
- select Geno.Chr, Geno.Mb from Geno, Species
- where Species.Name = '{}' and
- Geno.Name = '{}' and
- Geno.SpeciesId = Species.Id
- """.format(self.dataset.group.species, self.locus)
- logger.sql(query)
- result = g.db.execute(query).fetchone()
- if result:
- self.locus_chr = result[0]
- self.locus_mb = result[1]
- else:
- self.locus = self.locus_chr = self.locus_mb = self.additive = ""
- else:
- self.locus = self.locus_chr = self.locus_mb = self.additive = ""
- else:
- self.locus = self.lrs = self.additive = ""
-
- if (self.dataset.type == 'Publish' or self.dataset.type == "ProbeSet") and self.locus_chr != "" and self.locus_mb != "":
- #XZ: LRS_location_value is used for sorting
- try:
- LRS_location_value = int(self.locus_chr)*1000 + float(self.locus_mb)
- except:
- if self.locus_chr.upper() == 'X':
- LRS_location_value = 20*1000 + float(self.locus_mb)
- else:
- LRS_location_value = ord(str(self.locus_chr).upper()[0])*1000 + float(self.locus_mb)
-
- self.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (self.locus_chr, float(self.locus_mb))
- if self.lrs != "":
- self.LRS_score_repr = LRS_score_repr = '%3.1f' % self.lrs
- self.LRS_score_value = LRS_score_value = self.lrs
- else:
- raise KeyError, `self.name`+' information is not found in the database.'
-
- def genHTML(self, formName = "", dispFromDatabase=0, privilege="guest", userName="Guest", authorized_users=""):
- if not self.haveinfo:
- self.retrieveInfo()
-
- if self.dataset.type == 'Publish':
- PubMedLink = ""
- if self.pubmed_id:
- PubMedLink = HT.Href(text="PubMed %d : " % self.pubmed_id,
- target = "_blank", url = webqtlConfig.PUBMEDLINK_URL % self.pubmed_id)
- else:
- PubMedLink = HT.Span("Unpublished : ", Class="fs15")
-
- if formName:
- setDescription2 = HT.Href(url="javascript:showDatabase3('%s','%s','%s','')" %
- (formName, self.dataset.name, self.name), Class = "fs14")
- else:
- setDescription2 = HT.Href(url="javascript:showDatabase2('%s','%s','')" %
- (self.dataset.name,self.name), Class = "fs14")
-
- if self.confidential and not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=privilege, userName=userName, authorized_users=authorized_users):
- setDescription2.append('RecordID/%s - %s' % (self.name, self.pre_publication_description))
- else:
- setDescription2.append('RecordID/%s - %s' % (self.name, self.post_publication_description))
-
- #XZ 03/26/2011: Xiaodong comment out the following two lins as Rob asked. Need to check with Rob why in PublishXRef table, there are few row whose Sequence > 1.
- #if self.sequence > 1:
- # setDescription2.append(' btach %d' % self.sequence)
- if self.authors:
- a1 = string.split(self.authors,',')[0]
- while a1[0] == '"' or a1[0] == "'" :
- a1 = a1[1:]
- setDescription2.append(' by ')
- setDescription2.append(HT.Italic('%s, and colleagues' % a1))
- setDescription = HT.Span(PubMedLink, setDescription2)
-
- elif self.dataset.type == 'Temp':
- setDescription = HT.Href(text="%s" % (self.description),url="javascript:showDatabase2\
- ('%s','%s','')" % (self.dataset.name,self.name), Class = "fs14")
- setDescription = HT.Span(setDescription)
-
- elif self.dataset.type == 'Geno': # Genome DB only available for single search
- if formName:
- setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
- '%2.3f' % self.mb),url="javascript:showDatabase3('%s','%s','%s','')" % \
- (formName, self.dataset.name, self.name), Class = "fs14")
- else:
- setDescription = HT.Href(text="Locus %s [Chr %s @ %s Mb]" % (self.name,self.chr,\
- '%2.3f' % self.mb),url="javascript:showDatabase2('%s','%s','')" % \
- (self.dataset.name,self.name), Class = "fs14")
-
- setDescription = HT.Span(setDescription)
-
- else:
- if self.cellid:
- if formName:
- setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name, self.cellid),url=\
- "javascript:showDatabase3('%s','%s','%s','%s')" % (formName, self.dataset.name,self.name,self.cellid), \
- Class = "fs14")
- else:
- setDescription = HT.Href(text="ProbeSet/%s/%s" % (self.name,self.cellid),url=\
- "javascript:showDatabase2('%s','%s','%s')" % (self.dataset.name,self.name,self.cellid), \
- Class = "fs14")
- else:
- if formName:
- setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
- "javascript:showDatabase3('%s','%s','%s','')" % (formName, self.dataset.name,self.name), \
- Class = "fs14")
- else:
- setDescription = HT.Href(text="ProbeSet/%s" % self.name, url=\
- "javascript:showDatabase2('%s','%s','')" % (self.dataset.name,self.name), \
- Class = "fs14")
- if self.symbol and self.chr and self.mb:
- setDescription.append(' [')
- setDescription.append(HT.Italic('%s' % self.symbol,Class="cdg fwb"))
- setDescription.append(' on Chr %s @ %s Mb]' % (self.chr,self.mb))
- if self.description:
- setDescription.append(': %s' % self.description)
- if self.probe_target_description:
- setDescription.append('; %s' % self.probe_target_description)
- setDescription = HT.Span(setDescription)
-
- if self.dataset.type != 'Temp' and dispFromDatabase:
- setDescription.append( ' --- FROM : ')
- setDescription.append(self.dataset.genHTML(Class='cori'))
- return setDescription
-
@property
def name_header_fmt(self):
'''Return a human-readable name for use in page header'''
@@ -644,7 +207,7 @@ class GeneralTrait(object):
formatted = self.post_publication_description
else:
formatted = "Not available"
- return formatted.capitalize()
+ return formatted
@property
def alias_fmt(self):
@@ -682,62 +245,48 @@ class GeneralTrait(object):
return fmt
-
- def get_database(self):
- """
- Returns the database, and the url referring to the database if it exists
-
- We're going to to return two values here, and we don't want to have to call this twice from
- the template. So it's not a property called from the template, but instead is called from the view
-
- """
- if self.cellid:
- query = """ select ProbeFreeze.Name from ProbeFreeze, ProbeSetFreeze where
- ProbeFreeze.Id =
- ProbeSetFreeze.ProbeFreezeId AND
- ProbeSetFreeze.Id = %d""" % thisTrait.dataset.id
- logger.sql(query)
- probeDBName = g.db.execute(query).fetchone()[0]
- return dict(name = probeDBName,
- url = None)
- else:
- return dict(name = self.dataset.fullname,
- url = webqtlConfig.INFOPAGEHREF % self.dataset.name)
-
- def calculate_correlation(self, values, method):
- """Calculate the correlation value and p value according to the method specified"""
-
- #ZS: This takes the list of values of the trait our selected trait is being correlated against and removes the values of the samples our trait has no value for
- #There's probably a better way of dealing with this, but I'll have to ask Christian
- updated_raw_values = []
- updated_values = []
- for i in range(len(values)):
- if values[i] != "None":
- updated_raw_values.append(self.raw_values[i])
- updated_values.append(values[i])
-
- self.raw_values = updated_raw_values
- values = updated_values
-
- if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON:
- corr, nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values))
- else:
- corr, nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values))
-
- self.correlation = corr
- self.overlap = nOverlap
-
- if self.overlap < 3:
- self.p_value = 1.0
- else:
- #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented.
- if abs(self.correlation) >= 1.0:
- self.p_value = 0.0
- else:
- ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
- ZValue = ZValue*sqrt(self.overlap-3)
- self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))
-
+# In ProbeSet, there are maybe several annotations match one sequence
+# so we need use sequence(BlatSeq) as the identification, when we update
+# one annotation, we update the others who match the sequence also.
+#
+# Hongqiang Li, 3/3/2008
+def getSequence(trait, dataset_name):
+ dataset = create_dataset(dataset_name)
+
+ if dataset.type == 'ProbeSet':
+ results = g.db.execute('''
+ SELECT
+ ProbeSet.BlatSeq
+ FROM
+ ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ WHERE
+ ProbeSet.Id=ProbeSetXRef.ProbeSetId and
+ ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
+ ProbeSet.Name = %s
+ ProbeSetFreeze.Name = %s
+ ''', trait.name, dataset.name).fetchone()
+
+ return results[0]
+
+def retrieve_sample_data(trait, dataset, samplelist=None):
+ if samplelist == None:
+ samplelist = []
+
+ results = dataset.retrieve_sample_data(trait.name)
+
+ # Todo: is this necessary? If not remove
+ trait.data.clear()
+
+ all_samples_ordered = dataset.group.all_samples_ordered()
+
+ if results:
+ for item in results:
+ name, value, variance, num_cases, name2 = item
+ if not samplelist or (samplelist and name in samplelist):
+ trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
+
+ return trait
+
def convert_location_to_value(chromosome, mb):
try:
location_value = int(chromosome)*1000 + float(mb)
@@ -765,3 +314,398 @@ def get_sample_data():
# jsonable_sample_data[sample] = trait_ob.data[sample].value
#
#return jsonable_sample_data
+
+def jsonable(trait, dataset_name):
+ """Return a dict suitable for using as json
+
+ Actual turning into json doesn't happen here though"""
+
+ dataset = create_dataset(dataset_name)
+
+ if dataset.type == "ProbeSet":
+ return dict(name=trait.name,
+ symbol=trait.symbol,
+ dataset=dataset.name,
+ description=trait.description_display,
+ mean=trait.mean,
+ location=trait.location_repr,
+ lrs_score=trait.LRS_score_repr,
+ lrs_location=trait.LRS_location_repr,
+ additive=trait.additive
+ )
+ elif dataset.type == "Publish":
+ if trait.pubmed_id:
+ return dict(name=trait.name,
+ dataset=dataset.name,
+ description=trait.description_display,
+ authors=trait.authors,
+ pubmed_text=trait.pubmed_text,
+ pubmed_link=trait.pubmed_link,
+ lrs_score=trait.LRS_score_repr,
+ lrs_location=trait.LRS_location_repr,
+ additive=trait.additive
+ )
+ else:
+ return dict(name=trait.name,
+ dataset=dataset.name,
+ description=trait.description_display,
+ authors=trait.authors,
+ pubmed_text=trait.pubmed_text,
+ lrs_score=trait.LRS_score_repr,
+ lrs_location=trait.LRS_location_repr,
+ additive=trait.additive
+ )
+ elif dataset.type == "Geno":
+ return dict(name=trait.name,
+ dataset=dataset.name,
+ location=trait.location_repr
+ )
+ else:
+ return dict()
+
+def jsonable_table_row(trait, dataset_name, index):
+ """Return a list suitable for json and intended to be displayed in a table
+
+ Actual turning into json doesn't happen here though"""
+
+ dataset = create_dataset(dataset_name)
+
+ if dataset.type == "ProbeSet":
+ if trait.mean == "":
+ mean = "N/A"
+ else:
+ mean = "%.3f" % round(float(trait.mean), 2)
+ if trait.additive == "":
+ additive = "N/A"
+ else:
+ additive = "%.3f" % round(float(trait.additive), 2)
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ index,
+ '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+ trait.symbol,
+ trait.description_display,
+ trait.location_repr,
+ mean,
+ trait.LRS_score_repr,
+ trait.LRS_location_repr,
+ additive]
+ elif dataset.type == "Publish":
+ if trait.additive == "":
+ additive = "N/A"
+ else:
+ additive = "%.2f" % round(float(trait.additive), 2)
+ if trait.pubmed_id:
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ index,
+ '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+ trait.description_display,
+ trait.authors,
+ '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
+ trait.LRS_score_repr,
+ trait.LRS_location_repr,
+ additive]
+ else:
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ index,
+ '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+ trait.description_display,
+ trait.authors,
+ trait.pubmed_text,
+ trait.LRS_score_repr,
+ trait.LRS_location_repr,
+ additive]
+ elif dataset.type == "Geno":
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ index,
+ '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
+ trait.location_repr]
+ else:
+ return dict()
+
+def retrieve_trait_info(trait, dataset, get_qtl_info=False):
+ assert dataset, "Dataset doesn't exist"
+
+ if dataset.type == 'Publish':
+ query = """
+ SELECT
+ PublishXRef.Id, Publication.PubMed_ID,
+ Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
+ Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
+ Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
+ Publication.Authors, Publication.Title, Publication.Abstract,
+ Publication.Journal, Publication.Volume, Publication.Pages,
+ Publication.Month, Publication.Year, PublishXRef.Sequence,
+ Phenotype.Units, PublishXRef.comments
+ FROM
+ PublishXRef, Publication, Phenotype, PublishFreeze
+ WHERE
+ PublishXRef.Id = %s AND
+ Phenotype.Id = PublishXRef.PhenotypeId AND
+ Publication.Id = PublishXRef.PublicationId AND
+ PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishFreeze.Id = %s
+ """ % (trait.name, dataset.id)
+
+ logger.sql(query)
+ trait_info = g.db.execute(query).fetchone()
+
+
+ #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
+ #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
+ elif dataset.type == 'ProbeSet':
+ display_fields_string = ', ProbeSet.'.join(dataset.display_fields)
+ display_fields_string = 'ProbeSet.' + display_fields_string
+ query = """
+ SELECT %s
+ FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetFreeze.Name = '%s' AND
+ ProbeSet.Name = '%s'
+ """ % (escape(display_fields_string),
+ escape(dataset.name),
+ escape(str(trait.name)))
+ logger.sql(query)
+ trait_info = g.db.execute(query).fetchone()
+ #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
+ # to avoid the problem of same marker name from different species.
+ elif dataset.type == 'Geno':
+ display_fields_string = string.join(dataset.display_fields,',Geno.')
+ display_fields_string = 'Geno.' + display_fields_string
+ query = """
+ SELECT %s
+ FROM Geno, GenoFreeze, GenoXRef
+ WHERE
+ GenoXRef.GenoFreezeId = GenoFreeze.Id AND
+ GenoXRef.GenoId = Geno.Id AND
+ GenoFreeze.Name = '%s' AND
+ Geno.Name = '%s'
+ """ % (escape(display_fields_string),
+ escape(dataset.name),
+ escape(trait.name))
+ logger.sql(query)
+ trait_info = g.db.execute(query).fetchone()
+ else: #Temp type
+ query = """SELECT %s FROM %s WHERE Name = %s"""
+ logger.sql(query)
+ trait_info = g.db.execute(query,
+ (string.join(dataset.display_fields,','),
+ dataset.type, trait.name)).fetchone()
+ if trait_info:
+ trait.haveinfo = True
+
+ #XZ: assign SQL query result to trait attributes.
+ for i, field in enumerate(dataset.display_fields):
+ holder = trait_info[i]
+ if isinstance(trait_info[i], basestring):
+ holder = unicode(trait_info[i], "utf-8", "ignore")
+ setattr(trait, field, holder)
+
+ if dataset.type == 'Publish':
+ trait.confidential = 0
+ if trait.pre_publication_description and not trait.pubmed_id:
+ trait.confidential = 1
+
+ description = trait.post_publication_description
+
+ #If the dataset is confidential and the user has access to confidential
+ #phenotype traits, then display the pre-publication description instead
+ #of the post-publication description
+ if trait.confidential:
+ trait.description_display = trait.pre_publication_description
+
+ #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
+ # privilege=self.dataset.privilege,
+ # userName=self.dataset.userName,
+ # authorized_users=self.authorized_users):
+ #
+ # description = self.pre_publication_description
+ else:
+ if description:
+ trait.description_display = description.strip()
+ else:
+ trait.description_display = ""
+
+ if not trait.year.isdigit():
+ trait.pubmed_text = "N/A"
+ else:
+ trait.pubmed_text = trait.year
+
+ if trait.pubmed_id:
+ trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id
+
+
+ trait.homologeneid = None
+ if dataset.type == 'ProbeSet' and dataset.group:
+ if trait.geneid:
+ #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
+ #XZ: So I have to test if geneid is number before execute the query.
+ #XZ: The geneid values in database should be cleaned up.
+ #try:
+ # float(self.geneid)
+ # geneidIsNumber = True
+ #except ValueError:
+ # geneidIsNumber = False
+ #if geneidIsNumber:
+ query = """
+ SELECT
+ HomologeneId
+ FROM
+ Homologene, Species, InbredSet
+ WHERE
+ Homologene.GeneId ='%s' AND
+ InbredSet.Name = '%s' AND
+ InbredSet.SpeciesId = Species.Id AND
+ Species.TaxonomyId = Homologene.TaxonomyId
+ """ % (escape(str(trait.geneid)), escape(dataset.group.name))
+ logger.sql(query)
+ result = g.db.execute(query).fetchone()
+ #else:
+ # result = None
+
+ if result:
+ trait.homologeneid = result[0]
+
+ description_string = unicode(str(trait.description).strip(codecs.BOM_UTF8), 'utf-8')
+ target_string = unicode(str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
+
+ if len(description_string) > 1 and description_string != 'None':
+ description_display = description_string
+ else:
+ description_display = trait.symbol
+
+ if (len(description_display) > 1 and description_display != 'N/A' and
+ len(target_string) > 1 and target_string != 'None'):
+ description_display = description_display + '; ' + target_string.strip()
+
+ # Save it for the jinja2 template
+ trait.description_display = description_display
+
+ #XZ: trait_location_value is used for sorting
+ trait.location_repr = 'N/A'
+ trait.location_value = 1000000
+
+ if trait.chr and trait.mb:
+ #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+ #This is so we can convert the location to a number used for sorting
+ trait_location_value = convert_location_to_value(trait.chr, trait.mb)
+ #try:
+ # trait_location_value = int(self.chr)*1000 + self.mb
+ #except ValueError:
+ # if self.chr.upper() == 'X':
+ # trait_location_value = 20*1000 + self.mb
+ # else:
+ # trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
+ # self.mb)
+
+ #ZS: Put this in function currently called "convert_location_to_value"
+ trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb))
+ trait.location_value = trait_location_value
+
+ elif dataset.type == "Geno":
+ trait.location_repr = 'N/A'
+ trait.location_value = 1000000
+
+ if trait.chr and trait.mb:
+ #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+ #This is so we can convert the location to a number used for sorting
+ trait_location_value = convert_location_to_value(trait.chr, trait.mb)
+
+ #ZS: Put this in function currently called "convert_location_to_value"
+ trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb))
+ trait.location_value = trait_location_value
+
+ if get_qtl_info:
+ #LRS and its location
+ trait.LRS_score_repr = "N/A"
+ trait.LRS_score_value = 0
+ trait.LRS_location_repr = "N/A"
+ trait.LRS_location_value = 1000000
+ if dataset.type == 'ProbeSet' and not trait.cellid:
+ query = """
+ SELECT
+ ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
+ FROM
+ ProbeSetXRef, ProbeSet
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSet.Name = "{}" AND
+ ProbeSetXRef.ProbeSetFreezeId ={}
+ """.format(trait.name, dataset.id)
+ logger.sql(query)
+ trait_qtl = g.db.execute(query).fetchone()
+ if trait_qtl:
+ trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl
+ if trait.locus:
+ query = """
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = '{}' and
+ Geno.Name = '{}' and
+ Geno.SpeciesId = Species.Id
+ """.format(dataset.group.species, trait.locus)
+ logger.sql(query)
+ result = g.db.execute(query).fetchone()
+ if result:
+ trait.locus_chr = result[0]
+ trait.locus_mb = result[1]
+ else:
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+ else:
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+ else:
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.mean = trait.additive = ""
+
+
+ if dataset.type == 'Publish':
+ query = """
+ SELECT
+ PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive
+ FROM
+ PublishXRef, PublishFreeze
+ WHERE
+ PublishXRef.Id = %s AND
+ PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishFreeze.Id =%s
+ """ % (trait.name, dataset.id)
+ logger.sql(query)
+ trait_qtl = g.db.execute(query).fetchone()
+ if trait_qtl:
+ trait.locus, trait.lrs, trait.additive = trait_qtl
+ if trait.locus:
+ query = """
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = '{}' and
+ Geno.Name = '{}' and
+ Geno.SpeciesId = Species.Id
+ """.format(dataset.group.species, trait.locus)
+ logger.sql(query)
+ result = g.db.execute(query).fetchone()
+ if result:
+ trait.locus_chr = result[0]
+ trait.locus_mb = result[1]
+ else:
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+ else:
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
+ else:
+ trait.locus = trait.lrs = trait.additive = ""
+
+ if (dataset.type == 'Publish' or dataset.type == "ProbeSet") and trait.locus_chr != "" and trait.locus_mb != "":
+ #XZ: LRS_location_value is used for sorting
+ try:
+ LRS_location_value = int(trait.locus_chr)*1000 + float(trait.locus_mb)
+ except:
+ if trait.locus_chr.upper() == 'X':
+ LRS_location_value = 20*1000 + float(trait.locus_mb)
+ else:
+ LRS_location_value = ord(str(trait.locus_chr).upper()[0])*1000 + float(trait.locus_mb)
+
+ trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (trait.locus_chr, float(trait.locus_mb))
+ if trait.lrs != "":
+ trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
+ trait.LRS_score_value = LRS_score_value = trait.lrs
+ else:
+ raise KeyError, `trait.name`+' information is not found in the database.'
+
+ return trait \ No newline at end of file
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 2f88f778..845a7224 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -44,15 +44,15 @@ class webqtlCaseData(object):
def __repr__(self):
str = "<webqtlCaseData> "
- if self.value != None:
+ if self.value:
str += "value=%2.3f" % self.value
- if self.variance != None:
+ if self.variance:
str += " variance=%2.3f" % self.variance
- if self.num_cases != None:
+ if self.num_cases:
str += " ndata=%d" % self.num_cases
- if self.name != None:
+ if self.name:
str += " name=%s" % self.name
- if self.name2 != None:
+ if self.name2:
str += " name2=%s" % self.name2
return str