aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
authorZachary Sloan2013-06-20 22:20:23 +0000
committerZachary Sloan2013-06-20 22:20:23 +0000
commit4ffee373494170e708678039dca132f1bd729ab1 (patch)
treea9d6054380f5a9d612a4d1d88889f68ea4923a75 /wqflask/base
parent939058c4a3b668037974f2876b072c4be008da26 (diff)
parent52ac4b6e1c014801080cbbcad53df868058d2657 (diff)
downloadgenenetwork2-4ffee373494170e708678039dca132f1bd729ab1.tar.gz
Merge branch 'flask'
Diffstat (limited to 'wqflask/base')
-rwxr-xr-xwqflask/base/data_set.py420
-rw-r--r--wqflask/base/generate_probesetfreeze_file.py31
-rw-r--r--wqflask/base/species.py12
-rwxr-xr-xwqflask/base/trait.py145
-rwxr-xr-xwqflask/base/webqtlConfig.py9
-rwxr-xr-xwqflask/base/webqtlConfigLocal.py16
6 files changed, 454 insertions, 179 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 50ef8f57..07fe9cd9 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -22,10 +22,14 @@
from __future__ import absolute_import, print_function, division
import os
+import math
+import string
+import collections
-from flask import Flask, g
+import json
+import itertools
-from htmlgen import HTMLgen2 as HT
+from flask import Flask, g
import reaper
@@ -33,6 +37,8 @@ from base import webqtlConfig
from base import species
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
+from utility.benchmark import Bench
+from wqflask.my_pylmm.pyLMM import chunks
from MySQLdb import escape_string as escape
from pprint import pformat as pf
@@ -41,29 +47,102 @@ from pprint import pformat as pf
DS_NAME_MAP = {}
def create_dataset(dataset_name):
- #cursor = db_conn.cursor()
- print("dataset_name:", dataset_name)
+ #print("dataset_name:", dataset_name)
query = """
SELECT DBType.Name
FROM DBList, DBType
- WHERE DBList.Name = '%s' and
+ WHERE DBList.Name = '{}' and
DBType.Id = DBList.DBTypeId
- """ % (escape(dataset_name))
- print("query is: ", pf(query))
+ """.format(escape(dataset_name))
+ #print("query is: ", pf(query))
dataset_type = g.db.execute(query).fetchone().Name
#dataset_type = cursor.fetchone()[0]
- print("[blubber] dataset_type:", pf(dataset_type))
+ #print("[blubber] dataset_type:", pf(dataset_type))
dataset_ob = DS_NAME_MAP[dataset_type]
#dataset_class = getattr(data_set, dataset_ob)
- print("dataset_ob:", dataset_ob)
- print("DS_NAME_MAP:", pf(DS_NAME_MAP))
+ #print("dataset_ob:", dataset_ob)
+ #print("DS_NAME_MAP:", pf(DS_NAME_MAP))
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name)
+def create_in_clause(items):
+ """Create an in clause for mysql"""
+ in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+ in_clause = '( {} )'.format(in_clause)
+ return in_clause
+
+
+def mescape(*items):
+ """Multiple escape"""
+ escaped = [escape(str(item)) for item in items]
+ #print("escaped is:", escaped)
+ return escaped
+
+
+class Markers(object):
+ """Todo: Build in cacheing so it saves us reading the same file more than once"""
+ def __init__(self, name):
+ json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
+ self.markers = json.load(json_data_fh)
+
+ def add_pvalues(self, p_values):
+ #print("length of self.markers:", len(self.markers))
+ #print("length of p_values:", len(p_values))
+
+ # THIS IS only needed for the case when we are limiting the number of p-values calculated
+ if len(self.markers) < len(p_values):
+ self.markers = self.markers[:len(p_values)]
+
+ for marker, p_value in itertools.izip(self.markers, p_values):
+ marker['p_value'] = p_value
+ print("p_value is:", marker['p_value'])
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
+
+
+
+class HumanMarkers(Markers):
+
+ def __init__(self, name):
+ marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim'))
+ self.markers = []
+ for line in marker_data_fh:
+ splat = line.strip().split()
+ marker = {}
+ marker['chr'] = int(splat[0])
+ marker['name'] = splat[1]
+ marker['Mb'] = float(splat[3]) / 1000000
+ self.markers.append(marker)
+
+ #print("markers is: ", pf(self.markers))
+
+
+ def add_pvalues(self, p_values):
+ #for marker, p_value in itertools.izip(self.markers, p_values):
+ # if marker['Mb'] <= 0 and marker['chr'] == 0:
+ # continue
+ # marker['p_value'] = p_value
+ # print("p_value is:", marker['p_value'])
+ # marker['lod_score'] = -math.log10(marker['p_value'])
+ # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
+ super(HumanMarkers, self).add_pvalues(p_values)
+
+ with Bench("deleting markers"):
+ markers = []
+ for marker in self.markers:
+ if not marker['Mb'] <= 0 and not marker['chr'] == 0:
+ markers.append(marker)
+ self.markers = markers
+
+
class DatasetGroup(object):
"""
@@ -79,22 +158,41 @@ class DatasetGroup(object):
if self.name == 'BXD300':
self.name = "BXD"
+ self.f1list = None
+ self.parlist = None
+ self.get_f1_parent_strains()
+ #print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
+
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
self.incparentsf1 = False
- self.f1list = None
- self.parlist = None
self.allsamples = None
+
+
+ def get_markers(self):
+ #print("self.species is:", self.species)
+ if self.species == "human":
+ marker_class = HumanMarkers
+ else:
+ marker_class = Markers
+ self.markers = marker_class(self.name)
+
- #def read_genotype(self):
- # self.read_genotype_file()
- #
- # if not self.genotype: # Didn'd succeed, so we try method 2
- # self.read_genotype_data()
+ def get_f1_parent_strains(self):
+ try:
+ # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
+ f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
+ except KeyError:
+ f1 = f12 = maternal = paternal = None
+
+ if f1 and f12:
+ self.f1list = [f1, f12]
+ if maternal and paternal:
+ self.parlist = [maternal, paternal]
def read_genotype_file(self):
- '''read genotype from .geno file instead of database'''
+ '''Read genotype from .geno file instead of database'''
#if self.group == 'BXD300':
# self.group = 'BXD'
#
@@ -104,38 +202,24 @@ class DatasetGroup(object):
#genotype_2 is Dataset Object with parents and f1 (not for intercross)
genotype_1 = reaper.Dataset()
-
+
# reaper barfs on unicode filenames, so here we ensure it's a string
full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
genotype_1.read(full_filename)
- print("Got to after read")
-
- try:
- # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
- f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
- except KeyError:
- f1 = f12 = maternal = paternal = None
-
-
- if genotype_1.type == "group" and maternal and paternal:
- genotype_2 = genotype_1.add(Mat=maternal, Pat=paternal) #, F1=_f1)
+ if genotype_1.type == "group" and self.parlist:
+ genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
else:
genotype_2 = genotype_1
#determine default genotype object
if self.incparentsf1 and genotype_1.type != "intercross":
- self.genotype = genotype_2
+ genotype = genotype_2
else:
self.incparentsf1 = 0
- self.genotype = genotype_1
-
- self.samplelist = list(self.genotype.prgy)
+ genotype = genotype_1
- if f1 and f12:
- self.f1list = [f1, f12]
- if maternal and paternal:
- self.parlist = [maternal, paternal]
+ self.samplelist = list(genotype.prgy)
class DataSet(object):
@@ -159,10 +243,10 @@ class DataSet(object):
self.retrieve_other_names()
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
+ self.group.read_genotype_file()
self.species = species.TheSpecies(self)
-
-
-
+
+
def get_desc(self):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
return None
@@ -209,14 +293,14 @@ class DataSet(object):
self.name,
self.name,
self.name))
- print("query_args are:", query_args)
+ #print("query_args are:", query_args)
- print("""
- SELECT Id, Name, FullName, ShortName
- FROM %s
- WHERE public > %s AND
- (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
- """ % (query_args))
+ #print("""
+ # SELECT Id, Name, FullName, ShortName
+ # FROM %s
+ # WHERE public > %s AND
+ # (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
+ # """ % (query_args))
self.id, self.name, self.fullname, self.shortname = g.db.execute("""
SELECT Id, Name, FullName, ShortName
@@ -227,11 +311,7 @@ class DataSet(object):
#self.cursor.execute(query)
#self.id, self.name, self.fullname, self.shortname = self.cursor.fetchone()
-
-
- #def genHTML(self, Class='c0dd'):
- # return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class),
- # url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank")
+
class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
@@ -291,6 +371,19 @@ class PhenotypeDataSet(DataSet):
# (Urgently?) Need to write this
pass
+ def get_trait_list(self):
+ query = """
+ select PublishXRef.Id
+ from PublishXRef, PublishFreeze
+ where PublishFreeze.InbredSetId=PublishXRef.InbredSetId
+ and PublishFreeze.Id = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ trait_data = {}
+ for trait in results:
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ return trait_data
+
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
if not this_trait.haveinfo:
@@ -301,7 +394,7 @@ class PhenotypeDataSet(DataSet):
continue # for now
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users):
description = this_trait.pre_publication_description
- this_trait.description_display = description
+ this_trait.description_display = unicode(description, "utf8")
if not this_trait.year.isdigit():
this_trait.pubmed_text = "N/A"
@@ -359,7 +452,7 @@ class PhenotypeDataSet(DataSet):
PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (trait.name, self.id)
+ """ % (trait, self.id)
results = g.db.execute(query).fetchall()
return results
@@ -399,6 +492,19 @@ class GenotypeDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
+
+ def get_trait_list(self):
+ query = """
+ select Geno.Name
+ from Geno, GenoXRef
+ where GenoXRef.GenoId = Geno.Id
+ and GenoFreezeId = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ trait_data = {}
+ for trait in results:
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ return trait_data
def get_trait_info(self, trait_list, species=None):
for this_trait in trait_list:
@@ -437,7 +543,7 @@ class GenotypeDataSet(DataSet):
GenoData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait.name, self.name)
+ """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
results = g.db.execute(query).fetchall()
return results
@@ -509,10 +615,95 @@ class MrnaAssayDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
+
+ def get_trait_list_1(self):
+ query = """
+ select ProbeSet.Name
+ from ProbeSet, ProbeSetXRef
+ where ProbeSetXRef.ProbeSetId = ProbeSet.Id
+ and ProbeSetFreezeId = {}
+ """.format(escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+ #print("After get_trait_list query")
+ trait_data = {}
+ for trait in results:
+ print("Retrieving sample_data for ", trait[0])
+ trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
+ #print("After retrieve_sample_data")
+ return trait_data
+
+ def get_trait_data(self):
+ self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
+ query = """
+ SELECT Strain.Name, Strain.Id FROM Strain, Species
+ WHERE Strain.Name IN {}
+ and Strain.SpeciesId=Species.Id
+ and Species.name = '{}'
+ """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+ results = dict(g.db.execute(query).fetchall())
+ sample_ids = [results[item] for item in self.samplelist]
+
+ # MySQL limits the number of tables that can be used in a join to 61,
+ # so we break the sample ids into smaller chunks
+ # Postgres doesn't have that limit, so we can get rid of this after we transition
+ chunk_size = 50
+ number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
+ trait_sample_data = []
+ for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
+
+ #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
+ #tempTable = None
+ #if GeneId and db.type == "ProbeSet":
+ # if method == "3":
+ # tempTable = self.getTempLiteratureTable(species=species,
+ # input_species_geneid=GeneId,
+ # returnNumber=returnNumber)
+ #
+ # if method == "4" or method == "5":
+ # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
+ # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
+ # method=method,
+ # returnNumber=returnNumber)
+
+ temp = ['T%s.value' % item for item in sample_ids_step]
+ query = "SELECT {}.Name,".format(escape(self.type))
+ data_start_pos = 1
+ query += string.join(temp, ', ')
+ query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
+ self.type,
+ self.type))
+
+ for item in sample_ids_step:
+ query += """
+ left join {}Data as T{} on T{}.Id = {}XRef.DataId
+ and T{}.StrainId={}\n
+ """.format(*mescape(self.type, item, item, self.type, item, item))
+
+ query += """
+ WHERE {}XRef.{}FreezeId = {}Freeze.Id
+ and {}Freeze.Name = '{}'
+ and {}.Id = {}XRef.{}Id
+ order by {}.Id
+ """.format(*mescape(self.type, self.type, self.type, self.type,
+ self.name, self.type, self.type, self.type, self.type))
+ results = g.db.execute(query).fetchall()
+ trait_sample_data.append(results)
+
+ trait_count = len(trait_sample_data[0])
+ self.trait_data = collections.defaultdict(list)
+
+ # put all of the separate data together into a dictionary where the keys are
+ # trait names and values are lists of sample values
+ for trait_counter in range(trait_count):
+ trait_name = trait_sample_data[0][trait_counter][0]
+ for chunk_counter in range(int(number_chunks)):
+ self.trait_data[trait_name] += (
+ trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
+
def get_trait_info(self, trait_list=None, species=''):
- # Note: setting trait_list to [] is probably not a great idea.
+ # Note: setting trait_list to [] is probably not a great idea.
if not trait_list:
trait_list = []
@@ -521,9 +712,7 @@ class MrnaAssayDataSet(DataSet):
if not this_trait.haveinfo:
this_trait.retrieveInfo(QTL=1)
- if this_trait.symbol:
- pass
- else:
+ if not this_trait.symbol:
this_trait.symbol = "N/A"
#XZ, 12/08/2008: description
@@ -531,60 +720,56 @@ class MrnaAssayDataSet(DataSet):
description_string = str(this_trait.description).strip()
target_string = str(this_trait.probe_target_description).strip()
- description_display = ''
-
if len(description_string) > 1 and description_string != 'None':
description_display = description_string
else:
description_display = this_trait.symbol
- if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None':
+ if (len(description_display) > 1 and description_display != 'N/A' and
+ len(target_string) > 1 and target_string != 'None'):
description_display = description_display + '; ' + target_string.strip()
# Save it for the jinja2 template
this_trait.description_display = description_display
- #print(" xxxxdd [%s]: %s" % (type(this_trait.description_display), description_display))
#XZ: trait_location_value is used for sorting
trait_location_repr = 'N/A'
trait_location_value = 1000000
if this_trait.chr and this_trait.mb:
- try:
- trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
- except:
- if this_trait.chr.upper() == 'X':
- trait_location_value = 20*1000 + this_trait.mb
- else:
- trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
-
- this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) )
+ #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
+ #This is so we can convert the location to a number used for sorting
+ trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb)
+ #try:
+ # trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
+ #except ValueError:
+ # if this_trait.chr.upper() == 'X':
+ # trait_location_value = 20*1000 + this_trait.mb
+ # else:
+ # trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 +
+ # this_trait.mb)
+
+ #ZS: Put this in function currently called "convert_location_to_value"
+ this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr,
+ float(this_trait.mb))
this_trait.location_value = trait_location_value
- #this_trait.trait_location_value = trait_location_value
- #XZ, 01/12/08: This SQL query is much faster.
+ #Get mean expression value
query = (
-"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
- where ProbeSetXRef.ProbeSetFreezeId = %s and
- ProbeSet.Id = ProbeSetXRef.ProbeSetId and
- ProbeSet.Name = '%s'
+ """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
+ where ProbeSetXRef.ProbeSetFreezeId = %s and
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+ ProbeSet.Name = '%s'
""" % (escape(str(this_trait.dataset.id)),
escape(this_trait.name)))
- print("query is:", pf(query))
+ #print("query is:", pf(query))
result = g.db.execute(query).fetchone()
+
+ mean = result[0] if result else 0
- if result:
- if result[0]:
- mean = result[0]
- else:
- mean=0
- else:
- mean = 0
-
- #XZ, 06/05/2009: It is neccessary to turn on nowrap
- this_trait.mean = repr = "%2.3f" % mean
+ this_trait.mean = "%2.3f" % mean
#LRS and its location
this_trait.LRS_score_repr = 'N/A'
@@ -603,23 +788,39 @@ class MrnaAssayDataSet(DataSet):
result = self.cursor.fetchone()
if result:
- if result[0] and result[1]:
- LRS_Chr = result[0]
- LRS_Mb = result[1]
-
- #XZ: LRS_location_value is used for sorting
- try:
- LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
- except:
- if LRS_Chr.upper() == 'X':
- LRS_location_value = 20*1000 + float(LRS_Mb)
- else:
- LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
+ #if result[0] and result[1]:
+ # lrs_chr = result[0]
+ # lrs_mb = result[1]
+ lrs_chr, lrs_mb = result
+ #XZ: LRS_location_value is used for sorting
+ lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
+
+ #try:
+ # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb)
+ #except:
+ # if lrs_chr.upper() == 'X':
+ # lrs_location_value = 20*1000 + float(lrs_mb)
+ # else:
+ # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 +
+ # float(lrs_mb))
+
+ this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
+ this_trait.LRS_score_value = this_trait.lrs
+ this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
+
+
+ def convert_location_to_value(self, chromosome, mb):
+ try:
+ location_value = int(chromosome)*1000 + float(mb)
+ except ValueError:
+ if chromosome.upper() == 'X':
+ location_value = 20*1000 + float(mb)
+ else:
+ location_value = (ord(str(chromosome).upper()[0])*1000 +
+ float(mb))
+
+ return location_value
- this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
- this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) )
-
def get_sequence(self):
query = """
SELECT
@@ -633,9 +834,9 @@ class MrnaAssayDataSet(DataSet):
ProbeSetFreeze.Name = %s
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
-
return results[0]
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -652,7 +853,7 @@ class MrnaAssayDataSet(DataSet):
ProbeSetData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (escape(trait.name), escape(self.name))
+ """ % (escape(trait), escape(self.name))
results = g.db.execute(query).fetchall()
return results
@@ -725,7 +926,7 @@ class TempDataSet(DataSet):
def geno_mrna_confidentiality(ob):
dataset_table = ob.type + "Freeze"
- print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
+ #print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
query = '''SELECT Id, Name, FullName, confidentiality,
AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table)
@@ -741,3 +942,4 @@ def geno_mrna_confidentiality(ob):
if confidential:
# Allow confidential data later
NoConfindetialDataForYouTodaySorry
+
diff --git a/wqflask/base/generate_probesetfreeze_file.py b/wqflask/base/generate_probesetfreeze_file.py
new file mode 100644
index 00000000..a0ff804b
--- /dev/null
+++ b/wqflask/base/generate_probesetfreeze_file.py
@@ -0,0 +1,31 @@
+from __future__ import absolute_import, print_function, division
+import os
+import math
+
+import json
+import itertools
+
+from flask import Flask, g
+
+from base import webqtlConfig
+from dbFunction import webqtlDatabaseFunction
+from utility import webqtlUtil
+
+from MySQLdb import escape_string as escape
+from pprint import pformat as pf
+
+
+query = """ select ProbeSet.Name
+ from ProbeSetXRef,
+ ProbeSetFreeze,
+ ProbeSet
+ where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and
+ ProbeSetFreeze.Name = "EPFLMouseMuscleCDRMA1211" and
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id;
+ """
+
+markers = g.db.execute(query).fetchall()
+print("markers: ", pf(markers))
+
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index 9d4cac4c..191f4535 100644
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -16,8 +16,7 @@ class TheSpecies(object):
print("self.dataset is:", pf(self.dataset.__dict__))
self.chromosomes = Chromosomes(self.dataset)
self.genome_mb_length = self.chromosomes.get_genome_mb_length()
-
-
+
#@property
#def chromosomes(self):
# chromosomes = [("All", -1)]
@@ -31,7 +30,8 @@ class TheSpecies(object):
# return chromosomes
class IndChromosome(object):
- def __init__(self, length):
+ def __init__(self, name, length):
+ self.name = name
self.length = length
@property
@@ -50,7 +50,7 @@ class Chromosomes(object):
results = g.db.execute("""
Select
- Chr_Length.Name, Length from Chr_Length, InbredSet
+ Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
where
Chr_Length.SpeciesId = InbredSet.SpeciesId AND
InbredSet.Name = %s
@@ -59,10 +59,10 @@ class Chromosomes(object):
print("bike:", results)
for item in results:
- self.chromosomes[item.Name] = IndChromosome(item.Length)
+ self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
self.set_mb_graph_interval()
- self.get_cm_length_list()
+ #self.get_cm_length_list()
def set_mb_graph_interval(self):
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 241bf2ab..db76ddea 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -1,6 +1,8 @@
from __future__ import absolute_import, division, print_function
import string
+import resource
+
from htmlgen import HTMLgen2 as HT
@@ -15,22 +17,38 @@ from pprint import pformat as pf
from flask import Flask, g
-class GeneralTrait:
+def print_mem(stage=""):
+ mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ print("{}: {}".format(stage, mem/1024))
+
+class GeneralTrait(object):
"""
Trait class defines a trait in webqtl, can be either Microarray,
Published phenotype, genotype, or user input trait
"""
- def __init__(self, **kw):
- print("in GeneralTrait")
- self.dataset = kw.get('dataset') # database name
+ def __init__(self, get_qtl_info=False, **kw):
+ # xor assertion
+ assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
+ if kw.get('dataset_name'):
+ self.dataset = create_dataset(kw.get('dataset_name'))
+ else:
+ self.dataset = kw.get('dataset')
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet
self.data = kw.get('data', {})
+
+ # Sets defaultst
+ self.locus = None
+ self.lrs = None
+ self.pvalue = None
+ self.mean = None
+ self.num_overlap = None
+
if kw.get('fullname'):
name2 = value.split("::")
@@ -39,13 +57,12 @@ class GeneralTrait:
# self.cellid is set to None above
elif len(name2) == 3:
self.dataset, self.name, self.cellid = name2
-
- self.dataset = create_dataset(self.dataset)
# Todo: These two lines are necessary most of the time, but perhaps not all of the time
# So we could add a simple if statement to short-circuit this if necessary
- self.retrieve_info()
+ self.retrieve_info(get_qtl_info=get_qtl_info)
self.retrieve_sample_data()
+
def get_name(self):
@@ -78,7 +95,7 @@ class GeneralTrait:
#desc = self.handle_pca(desc)
stringy = desc
return stringy
-
+
def display_name(self):
@@ -208,7 +225,7 @@ class GeneralTrait:
# ''' % (self.cellid, self.name, self.dataset.name)
#
#else:
- results = self.dataset.retrieve_sample_data(self)
+ results = self.dataset.retrieve_sample_data(self.name)
# Todo: is this necessary? If not remove
self.data.clear()
@@ -229,7 +246,7 @@ class GeneralTrait:
#def items(self):
# return self.__dict__.items()
- def retrieve_info(self, QTL=False):
+ def retrieve_info(self, get_qtl_info=False):
assert self.dataset, "Dataset doesn't exist"
if self.dataset.type == 'Publish':
query = """
@@ -251,7 +268,7 @@ class GeneralTrait:
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.Id = %s
""" % (self.name, self.dataset.id)
- traitInfo = g.db.execute(query).fetchone()
+ trait_info = g.db.execute(query).fetchone()
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
#XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
elif self.dataset.type == 'ProbeSet':
@@ -268,8 +285,8 @@ class GeneralTrait:
""" % (escape(display_fields_string),
escape(self.dataset.name),
escape(self.name))
- traitInfo = g.db.execute(query).fetchone()
- print("traitInfo is: ", pf(traitInfo))
+ trait_info = g.db.execute(query).fetchone()
+ #print("trait_info is: ", pf(trait_info))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -286,23 +303,24 @@ class GeneralTrait:
""" % (escape(display_fields_string),
escape(self.dataset.name),
escape(self.name))
- traitInfo = g.db.execute(query).fetchone()
- print("traitInfo is: ", pf(traitInfo))
+ trait_info = g.db.execute(query).fetchone()
+ #print("trait_info is: ", pf(trait_info))
else: #Temp type
query = """SELECT %s FROM %s WHERE Name = %s
""" % (string.join(self.dataset.display_fields,','),
self.dataset.type, self.name)
- traitInfo = g.db.execute(query).fetchone()
+ trait_info = g.db.execute(query).fetchone()
#self.cursor.execute(query)
- #traitInfo = self.cursor.fetchone()
- if traitInfo:
+ #trait_info = self.cursor.fetchone()
+ if trait_info:
self.haveinfo = True
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(self.dataset.display_fields):
- setattr(self, field, traitInfo[i])
+ print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i]))
+ setattr(self, field, trait_info[i])
if self.dataset.type == 'Publish':
self.confidential = 0
@@ -310,55 +328,76 @@ class GeneralTrait:
self.confidential = 1
self.homologeneid = None
+
+ print("self.geneid is:", self.geneid)
+ print(" type:", type(self.geneid))
+ print("self.dataset.group.name is:", self.dataset.group.name)
if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid:
#XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number.
#XZ: So I have to test if geneid is number before execute the query.
#XZ: The geneid values in database should be cleaned up.
- try:
- junk = float(self.geneid)
- geneidIsNumber = 1
- except:
- geneidIsNumber = 0
-
- if geneidIsNumber:
- query = """
- SELECT
- HomologeneId
- FROM
- Homologene, Species, InbredSet
- WHERE
- Homologene.GeneId =%s AND
- InbredSet.Name = '%s' AND
- InbredSet.SpeciesId = Species.Id AND
- Species.TaxonomyId = Homologene.TaxonomyId
- """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
- result = g.db.execute(query).fetchone()
- else:
- result = None
+ #try:
+ # float(self.geneid)
+ # geneidIsNumber = True
+ #except ValueError:
+ # geneidIsNumber = False
+
+ #if geneidIsNumber:
+
+
+ query = """
+ SELECT
+ HomologeneId
+ FROM
+ Homologene, Species, InbredSet
+ WHERE
+ Homologene.GeneId =%s AND
+ InbredSet.Name = '%s' AND
+ InbredSet.SpeciesId = Species.Id AND
+ Species.TaxonomyId = Homologene.TaxonomyId
+ """ % (escape(str(self.geneid)), escape(self.dataset.group.name))
+ print("-> query is:", query)
+ result = g.db.execute(query).fetchone()
+ #else:
+ # result = None
if result:
self.homologeneid = result[0]
- if QTL:
+ if get_qtl_info:
if self.dataset.type == 'ProbeSet' and not self.cellid:
- traitQTL = g.db.execute("""
+ query = """
SELECT
ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean
FROM
ProbeSetXRef, ProbeSet
WHERE
ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSet.Name = "%s" AND
- ProbeSetXRef.ProbeSetFreezeId =%s
- """, (self.name, self.dataset.id)).fetchone()
+ ProbeSet.Name = "{}" AND
+ ProbeSetXRef.ProbeSetFreezeId ={}
+ """.format(self.name, self.dataset.id)
+ trait_qtl = g.db.execute(query).fetchone()
#self.cursor.execute(query)
- #traitQTL = self.cursor.fetchone()
- if traitQTL:
- self.locus, self.lrs, self.pvalue, self.mean = traitQTL
+ #trait_qtl = self.cursor.fetchone()
+ if trait_qtl:
+ self.locus, self.lrs, self.pvalue, self.mean = trait_qtl
+ if self.locus:
+ query = """
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = '{}' and
+ Geno.Name = '{}' and
+ Geno.SpeciesId = Species.Id
+ """.format(self.dataset.group.species, self.locus)
+ print("query is:", query)
+ result = g.db.execute(query).fetchone()
+ self.locus_chr = result[0]
+ self.locus_mb = result[1]
else:
- self.locus = self.lrs = self.pvalue = self.mean = ""
+ self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = ""
+
+
if self.dataset.type == 'Publish':
- traitQTL = g.db.execute("""
+ trait_qtl = g.db.execute("""
SELECT
PublishXRef.Locus, PublishXRef.LRS
FROM
@@ -369,9 +408,9 @@ class GeneralTrait:
PublishFreeze.Id =%s
""", (self.name, self.dataset.id)).fetchone()
#self.cursor.execute(query)
- #traitQTL = self.cursor.fetchone()
- if traitQTL:
- self.locus, self.lrs = traitQTL
+ #trait_qtl = self.cursor.fetchone()
+ if trait_qtl:
+ self.locus, self.lrs = trait_qtl
else:
self.locus = self.lrs = ""
else:
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 755595e0..a811c3cd 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -52,19 +52,22 @@ ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?sp
SECUREDIR = GNROOT + 'secure/'
COMMON_LIB = GNROOT + 'support/admin'
HTMLPATH = GNROOT + 'web/'
+PYLMM_PATH = '/home/zas1024/'
+SNP_PATH = '/mnt/xvdf1/snps/'
IMGDIR = HTMLPATH +'image/'
IMAGESPATH = HTMLPATH + 'images/'
UPLOADPATH = IMAGESPATH + 'upload/'
-TMPDIR = '/tmp/'
+TMPDIR = HTMLPATH + 'tmp/'
GENODIR = HTMLPATH + 'genotypes/'
+NEWGENODIR = HTMLPATH + 'new_genotypes/'
GENO_ARCHIVE_DIR = GENODIR + 'archive/'
TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/'
CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/'
ChangableHtmlPath = GNROOT + 'web/'
SITENAME = 'GN'
-PORTADDR = "http://132.192.47.32"
-BASEHREF = '<base href="http://132.192.47.32/">'
+PORTADDR = "http://50.16.251.170"
+BASEHREF = '<base href="http://50.16.251.170/">'
INFOPAGEHREF = '/dbdoc/%s.html'
GLOSSARYFILE = "/glossary.html"
CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py
index 84686234..abaeff93 100755
--- a/wqflask/base/webqtlConfigLocal.py
+++ b/wqflask/base/webqtlConfigLocal.py
@@ -2,18 +2,18 @@
# Environment Variables - private
#########################################
-MYSQL_SERVER = 'localhost'
-DB_NAME = 'db_webqtl_zas1024'
+MYSQL_SERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com'
+DB_NAME = 'db_webqtl'
DB_USER = 'webqtl'
-DB_PASSWD = 'webqtl'
+DB_PASSWD = 'f2ZypIflRM'
-MYSQL_UPDSERVER = 'localhost'
-DB_UPDNAME = 'db_webqtl_zas1024'
+MYSQL_UPDSERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com'
+DB_UPDNAME = 'db_webqtl'
DB_UPDUSER = 'webqtl'
-DB_UPDPASSWD = 'webqtl'
+DB_UPDPASSWD = 'f2ZypIflRM'
-GNROOT = '/home/zas1024/gn/'
-ROOT_URL = 'http://alexandria.uthsc.edu:91/'
+GNROOT = '/home/zas1024/gene/'
+ROOT_URL = 'http://50.16.251.170'
PythonPath = '/usr/bin/python'
PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/'