aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
authorzsloan2016-05-17 16:04:11 +0000
committerzsloan2016-05-17 16:04:11 +0000
commitf7520d9a6e05b103bab983c31ef0e53fad59f5e6 (patch)
treea1ba729c38a48806ca43540a960d655aaa9989e9 /wqflask/base
parent2dfc56250714cb494eb7f3072b1e7cae18edace4 (diff)
parent04afa563e6d53fe2a91ac2e6eb4af2f2fa5d5c3b (diff)
downloadgenenetwork2-f7520d9a6e05b103bab983c31ef0e53fad59f5e6.tar.gz
Merge branch 'staging' of github.com:genenetwork/genenetwork2 into development
Diffstat (limited to 'wqflask/base')
-rw-r--r--[-rwxr-xr-x]wqflask/base/data_set.py401
-rwxr-xr-xwqflask/base/webqtlConfig.py79
-rwxr-xr-xwqflask/base/webqtlFormData.py2
3 files changed, 160 insertions, 322 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index e37a838f..053b45fc 100755..100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -44,6 +44,7 @@ from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
from utility import chunks
+from utility.tools import locate, locate_ignore_error
from maintenance import get_group_samplelists
@@ -57,40 +58,26 @@ DS_NAME_MAP = {}
def create_dataset(dataset_name, dataset_type = None, get_samplelist = True):
if not dataset_type:
dataset_type = Dataset_Getter(dataset_name)
- #dataset_type = get_dataset_type_from_json(dataset_name)
print("dataset_type is:", dataset_type)
- #query = """
- # SELECT DBType.Name
- # FROM DBList, DBType
- # WHERE DBList.Name = '{}' and
- # DBType.Id = DBList.DBTypeId
- # """.format(escape(dataset_name))
- #dataset_type = g.db.execute(query).fetchone().Name
-
dataset_ob = DS_NAME_MAP[dataset_type]
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name, get_samplelist)
-
-#def get_dataset_type_from_json(dataset_name):
-
class Dataset_Types(object):
-
+
def __init__(self):
self.datasets = {}
file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
with open(file_name, 'r') as fh:
data = json.load(fh)
-
+
print("*" * 70)
for species in data['datasets']:
for group in data['datasets'][species]:
for dataset_type in data['datasets'][species][group]:
for dataset in data['datasets'][species][group][dataset_type]:
- #print("dataset is:", dataset)
-
short_dataset_name = dataset[1]
if dataset_type == "Phenotypes":
new_type = "Publish"
@@ -99,32 +86,28 @@ class Dataset_Types(object):
else:
new_type = "ProbeSet"
self.datasets[short_dataset_name] = new_type
-
+
def __call__(self, name):
return self.datasets[name]
-
+
# Do the intensive work at startup one time only
Dataset_Getter = Dataset_Types()
-#
-#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611"))
-
-
def create_datasets_list():
key = "all_datasets"
result = Redis.get(key)
-
+
if result:
print("Cache hit!!!")
datasets = pickle.loads(result)
-
+
else:
datasets = list()
with Bench("Creating DataSets object"):
type_dict = {'Publish': 'PublishFreeze',
'ProbeSet': 'ProbeSetFreeze',
'Geno': 'GenoFreeze'}
-
+
for dataset_type in type_dict:
query = "SELECT Name FROM {}".format(type_dict[dataset_type])
for result in g.db.execute(query).fetchall():
@@ -133,10 +116,10 @@ def create_datasets_list():
#print("type: {}\tname: {}".format(dataset_type, result.Name))
dataset = create_dataset(result.Name, dataset_type)
datasets.append(dataset)
-
+
Redis.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL))
Redis.expire(key, 60*60)
-
+
return datasets
@@ -157,30 +140,30 @@ def mescape(*items):
class Markers(object):
"""Todo: Build in cacheing so it saves us reading the same file more than once"""
def __init__(self, name):
- json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
+ json_data_fh = open(locate(name + '.json','genotype/json'))
try:
markers = json.load(json_data_fh)
except:
markers = []
-
+
for marker in markers:
if (marker['chr'] != "X") and (marker['chr'] != "Y"):
marker['chr'] = int(marker['chr'])
marker['Mb'] = float(marker['Mb'])
-
+
self.markers = markers
#print("self.markers:", self.markers)
-
-
+
+
def add_pvalues(self, p_values):
print("length of self.markers:", len(self.markers))
print("length of p_values:", len(p_values))
-
+
if type(p_values) is list:
# THIS IS only needed for the case when we are limiting the number of p-values calculated
#if len(self.markers) > len(p_values):
# self.markers = self.markers[:len(p_values)]
-
+
for marker, p_value in itertools.izip(self.markers, p_values):
if not p_value:
continue
@@ -213,18 +196,11 @@ class Markers(object):
#self.markers.remove(marker)
#del self.markers[i]
self.markers = filtered_markers
-
-
- #for i, marker in enumerate(self.markers):
- # if not 'p_value' in marker:
- # #print("self.markers[i]", self.markers[i])
- # del self.markers[i]
- # #self.markers.remove(self.markers[i])
class HumanMarkers(Markers):
-
+
def __init__(self, name, specified_markers = []):
- marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim'))
+ marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
@@ -243,39 +219,21 @@ class HumanMarkers(Markers):
marker['name'] = splat[1]
marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
-
+
#print("markers is: ", pf(self.markers))
def add_pvalues(self, p_values):
- #for marker, p_value in itertools.izip(self.markers, p_values):
- # if marker['Mb'] <= 0 and marker['chr'] == 0:
- # continue
- # marker['p_value'] = p_value
- # print("p_value is:", marker['p_value'])
- # marker['lod_score'] = -math.log10(marker['p_value'])
- # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
- # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
-
- #print("p_values2:", pf(p_values))
super(HumanMarkers, self).add_pvalues(p_values)
-
- #with Bench("deleting markers"):
- # markers = []
- # for marker in self.markers:
- # if not marker['Mb'] <= 0 and not marker['chr'] == 0:
- # markers.append(marker)
- # self.markers = markers
-
-
+
class DatasetGroup(object):
"""
Each group has multiple datasets; each species has multiple groups.
-
+
For example, Mouse has multiple groups (BXD, BXA, etc), and each group
has multiple datasets associated with it.
-
+
"""
def __init__(self, dataset):
"""This sets self.group and self.group_id"""
@@ -283,14 +241,14 @@ class DatasetGroup(object):
self.name, self.id = g.db.execute(dataset.query_for_group).fetchone()
if self.name == 'BXD300':
self.name = "BXD"
-
+
self.f1list = None
self.parlist = None
self.get_f1_parent_strains()
#print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
-
+
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
-
+
self.incparentsf1 = False
self.allsamples = None
self._datasets = None
@@ -301,7 +259,7 @@ class DatasetGroup(object):
def get_markers(self):
#print("self.species is:", self.species)
if self.species == "human":
- marker_class = HumanMarkers
+ marker_class = HumanMarkers
else:
marker_class = Markers
@@ -310,12 +268,6 @@ class DatasetGroup(object):
def datasets(self):
key = "group_dataset_menu:v2:" + self.name
print("key is2:", key)
- #with Bench("Loading cache"):
- # result = Redis.get(key)
- #if result:
- # self._datasets = pickle.loads(result)
- # return self._datasets
-
dataset_menu = []
print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
@@ -355,7 +307,7 @@ class DatasetGroup(object):
dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
else:
dataset_sub_menu = [item[1:] for item in dataset]
-
+
tissue_already_exists = False
tissue_position = None
for i, tissue_dict in enumerate(dataset_menu):
@@ -383,7 +335,7 @@ class DatasetGroup(object):
f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
except KeyError:
f1 = f12 = maternal = paternal = None
-
+
if f1 and f12:
self.f1list = [f1, f12]
if maternal and paternal:
@@ -402,21 +354,17 @@ class DatasetGroup(object):
#print(" type: ", type(self.samplelist))
#print(" self.samplelist: ", self.samplelist)
else:
- #print("Cache not hit")
-
- from utility.tools import plink_command
- PLINK_PATH,PLINK_COMMAND = plink_command()
-
- geno_file_path = webqtlConfig.GENODIR+self.name+".geno"
- plink_file_path = PLINK_PATH+"/"+self.name+".fam"
-
- if os.path.isfile(plink_file_path):
- self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path)
- elif os.path.isfile(geno_file_path):
- self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
+ print("Cache not hit")
+
+ genotype_fn = locate_ignore_error(self.name+".geno",'genotype')
+ mapping_fn = locate_ignore_error(self.name+".fam",'mapping')
+ if mapping_fn:
+ self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn)
+ elif genotype_fn:
+ self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn)
else:
self.samplelist = None
- #print("after get_samplelist")
+ print("Sample list: ",self.samplelist)
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
@@ -428,30 +376,14 @@ class DatasetGroup(object):
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
- #if self.group == 'BXD300':
- # self.group = 'BXD'
- #
- #assert self.group, "self.group needs to be set"
-
#genotype_1 is Dataset Object without parents and f1
#genotype_2 is Dataset Object with parents and f1 (not for intercross)
genotype_1 = reaper.Dataset()
# reaper barfs on unicode filenames, so here we ensure it's a string
- full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
- if os.path.isfile(full_filename):
- #print("Reading file: ", full_filename)
- genotype_1.read(full_filename)
- #print("File read")
- else:
- try:
- full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno'))
- #print("Reading file")
- genotype_1.read(full_filename)
- #print("File read")
- except IOError:
- print("File doesn't exist!")
+ full_filename = str(locate(self.name+'.geno','genotype'))
+ genotype_1.read(full_filename)
if genotype_1.type == "group" and self.parlist:
genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
@@ -460,39 +392,15 @@ class DatasetGroup(object):
#determine default genotype object
if self.incparentsf1 and genotype_1.type != "intercross":
- #self.genotype = genotype_2
genotype = genotype_2
else:
self.incparentsf1 = 0
- #self.genotype = genotype_1
genotype = genotype_1
- #self.samplelist = list(self.genotype.prgy)
self.samplelist = list(genotype.prgy)
-
- return genotype
-
-
-#class DataSets(object):
-# """Builds a list of DataSets"""
-#
-# def __init__(self):
-# self.datasets = list()
-#
-
- #query = """SELECT Name FROM ProbeSetFreeze
- # UNION
- # SELECT Name From PublishFreeze
- # UNION
- # SELECT Name From GenoFreeze"""
- #
- #for result in g.db.execute(query).fetchall():
- # dataset = DataSet(result.Name)
- # self.datasets.append(dataset)
+ return genotype
-#ds = DataSets()
-#print("[orange] ds:", ds.datasets)
class DataSet(object):
"""
@@ -509,13 +417,14 @@ class DataSet(object):
self.shortname = None
self.fullname = None
self.type = None
+ self.data_scale = None #ZS: For example log2
self.setup()
self.check_confidentiality()
self.retrieve_other_names()
-
+
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
if get_samplelist == True:
self.group.get_samplelist()
@@ -525,32 +434,11 @@ class DataSet(object):
def get_desc(self):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
return None
-
- #@staticmethod
- #def get_by_trait_id(trait_id):
- # """Gets the dataset object given the trait id"""
- #
- #
- #
- # name = g.db.execute(""" SELECT
- #
- # """)
- #
- # return DataSet(name)
# Delete this eventually
@property
def riset():
Weve_Renamed_This_As_Group
-
-
- #@property
- #def group(self):
- # if not self._group:
- # self.get_group()
- #
- # return self._group
-
def retrieve_other_names(self):
"""
@@ -560,7 +448,7 @@ class DataSet(object):
This is not meant to retrieve the data set info if no name at all is passed.
"""
-
+
try:
if self.type == "ProbeSet":
query_args = tuple(escape(x) for x in (
@@ -569,8 +457,8 @@ class DataSet(object):
self.name,
self.name))
- self.id, self.name, self.fullname, self.shortname, self.tissue = g.db.execute("""
- SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, Tissue.Name
+ self.id, self.name, self.fullname, self.shortname, self.data_scale, self.tissue = g.db.execute("""
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name
FROM ProbeSetFreeze, ProbeFreeze, Tissue
WHERE ProbeSetFreeze.public > %s AND
ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
@@ -596,17 +484,17 @@ class DataSet(object):
except TypeError:
print("Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
-
+
def get_trait_data(self, sample_list=None):
if sample_list:
self.samplelist = sample_list
else:
self.samplelist = self.group.samplelist
-
+
if self.group.parlist != None and self.group.f1list != None:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
-
+
query = """
SELECT Strain.Name, Strain.Id FROM Strain, Species
WHERE Strain.Name IN {}
@@ -623,21 +511,6 @@ class DataSet(object):
number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
trait_sample_data = []
for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
-
- #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
- #tempTable = None
- #if GeneId and db.type == "ProbeSet":
- # if method == "3":
- # tempTable = self.getTempLiteratureTable(species=species,
- # input_species_geneid=GeneId,
- # returnNumber=returnNumber)
- #
- # if method == "4" or method == "5":
- # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
- # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
- # method=method,
- # returnNumber=returnNumber)
-
if self.type == "Publish":
dataset_type = "Phenotype"
else:
@@ -658,7 +531,7 @@ class DataSet(object):
left join {}Data as T{} on T{}.Id = {}XRef.DataId
and T{}.StrainId={}\n
""".format(*mescape(self.type, item, item, self.type, item, item))
-
+
if self.type == "Publish":
query += """
WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
@@ -675,16 +548,16 @@ class DataSet(object):
order by {}.Id
""".format(*mescape(self.type, self.type, self.type, self.type,
self.name, dataset_type, self.type, self.type, dataset_type))
-
+
#print("trait data query: ", query)
-
+
results = g.db.execute(query).fetchall()
#print("query results:", results)
trait_sample_data.append(results)
trait_count = len(trait_sample_data[0])
self.trait_data = collections.defaultdict(list)
-
+
# put all of the separate data together into a dictionary where the keys are
# trait names and values are lists of sample values
for trait_counter in range(trait_count):
@@ -697,9 +570,9 @@ class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
def setup(self):
-
+
#print("IS A PHENOTYPEDATASET")
-
+
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
@@ -770,26 +643,26 @@ class PhenotypeDataSet(DataSet):
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
-
+
if not this_trait.haveinfo:
this_trait.retrieve_info(get_qtl_info=True)
description = this_trait.post_publication_description
-
+
#If the dataset is confidential and the user has access to confidential
#phenotype traits, then display the pre-publication description instead
#of the post-publication description
if this_trait.confidential:
this_trait.description_display = ""
continue # for now
-
+
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
privilege=self.privilege,
userName=self.userName,
authorized_users=this_trait.authorized_users):
-
+
description = this_trait.pre_publication_description
-
+
if len(description) > 0:
this_trait.description_display = description.strip()
else:
@@ -834,7 +707,7 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -892,7 +765,7 @@ class GenotypeDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
-
+
def get_trait_list(self):
query = """
select Geno.Name
@@ -926,7 +799,7 @@ class GenotypeDataSet(DataSet):
this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
this_trait.location_value = trait_location_value
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1018,7 +891,7 @@ class MrnaAssayDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
-
+
def get_trait_list_1(self):
query = """
select ProbeSet.Name
@@ -1027,86 +900,14 @@ class MrnaAssayDataSet(DataSet):
and ProbeSetFreezeId = {}
""".format(escape(str(self.id)))
results = g.db.execute(query).fetchall()
- #print("After get_trait_list query")
trait_data = {}
for trait in results:
- #print("Retrieving sample_data for ", trait[0])
trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
- #print("After retrieve_sample_data")
return trait_data
-
- #def get_trait_data(self):
- # self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
- # query = """
- # SELECT Strain.Name, Strain.Id FROM Strain, Species
- # WHERE Strain.Name IN {}
- # and Strain.SpeciesId=Species.Id
- # and Species.name = '{}'
- # """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- # results = dict(g.db.execute(query).fetchall())
- # sample_ids = [results[item] for item in self.samplelist]
- #
- # # MySQL limits the number of tables that can be used in a join to 61,
- # # so we break the sample ids into smaller chunks
- # # Postgres doesn't have that limit, so we can get rid of this after we transition
- # chunk_size = 50
- # number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
- # trait_sample_data = []
- # for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
- #
- # #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
- # #tempTable = None
- # #if GeneId and db.type == "ProbeSet":
- # # if method == "3":
- # # tempTable = self.getTempLiteratureTable(species=species,
- # # input_species_geneid=GeneId,
- # # returnNumber=returnNumber)
- # #
- # # if method == "4" or method == "5":
- # # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
- # # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
- # # method=method,
- # # returnNumber=returnNumber)
- #
- # temp = ['T%s.value' % item for item in sample_ids_step]
- # query = "SELECT {}.Name,".format(escape(self.type))
- # data_start_pos = 1
- # query += string.join(temp, ', ')
- # query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
- # self.type,
- # self.type))
- #
- # for item in sample_ids_step:
- # query += """
- # left join {}Data as T{} on T{}.Id = {}XRef.DataId
- # and T{}.StrainId={}\n
- # """.format(*mescape(self.type, item, item, self.type, item, item))
- #
- # query += """
- # WHERE {}XRef.{}FreezeId = {}Freeze.Id
- # and {}Freeze.Name = '{}'
- # and {}.Id = {}XRef.{}Id
- # order by {}.Id
- # """.format(*mescape(self.type, self.type, self.type, self.type,
- # self.name, self.type, self.type, self.type, self.type))
- # results = g.db.execute(query).fetchall()
- # trait_sample_data.append(results)
- #
- # trait_count = len(trait_sample_data[0])
- # self.trait_data = collections.defaultdict(list)
- #
- # # put all of the separate data together into a dictionary where the keys are
- # # trait names and values are lists of sample values
- # for trait_counter in range(trait_count):
- # trait_name = trait_sample_data[0][trait_counter][0]
- # for chunk_counter in range(int(number_chunks)):
- # self.trait_data[trait_name] += (
- # trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
-
def get_trait_info(self, trait_list=None, species=''):
- # Note: setting trait_list to [] is probably not a great idea.
+ # Note: setting trait_list to [] is probably not a great idea.
if not trait_list:
trait_list = []
@@ -1169,7 +970,7 @@ class MrnaAssayDataSet(DataSet):
#print("query is:", pf(query))
result = g.db.execute(query).fetchone()
-
+
mean = result[0] if result else 0
if mean:
@@ -1190,28 +991,15 @@ class MrnaAssayDataSet(DataSet):
Geno.SpeciesId = Species.Id
""".format(species, this_trait.locus)
result = g.db.execute(query).fetchone()
-
+
if result:
- #if result[0] and result[1]:
- # lrs_chr = result[0]
- # lrs_mb = result[1]
lrs_chr, lrs_mb = result
#XZ: LRS_location_value is used for sorting
lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
-
- #try:
- # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb)
- #except:
- # if lrs_chr.upper() == 'X':
- # lrs_location_value = 20*1000 + float(lrs_mb)
- # else:
- # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 +
- # float(lrs_mb))
-
this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = this_trait.lrs
this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
-
+
def convert_location_to_value(self, chromosome, mb):
try:
@@ -1222,7 +1010,7 @@ class MrnaAssayDataSet(DataSet):
else:
location_value = (ord(str(chromosome).upper()[0])*1000 +
float(mb))
-
+
return location_value
def get_sequence(self):
@@ -1239,7 +1027,7 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
return results[0]
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1260,8 +1048,8 @@ class MrnaAssayDataSet(DataSet):
results = g.db.execute(query).fetchall()
#print("RETRIEVED RESULTS HERE:", results)
return results
-
-
+
+
def retrieve_genes(self, column_name):
query = """
select ProbeSet.Name, ProbeSet.%s
@@ -1270,37 +1058,8 @@ class MrnaAssayDataSet(DataSet):
ProbeSetXRef.ProbeSetId=ProbeSet.Id;
""" % (column_name, escape(str(self.id)))
results = g.db.execute(query).fetchall()
-
- return dict(results)
- #def retrieve_gene_symbols(self):
- # query = """
- # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId
- # from ProbeSet,ProbeSetXRef
- # where ProbeSetXRef.ProbeSetFreezeId = %s and
- # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- # """ % (self.id)
- # results = g.db.execute(query).fetchall()
- # symbol_dict = {}
- # for item in results:
- # symbol_dict[item[0]] = item[1]
- # return symbol_dict
- #
- #def retrieve_gene_ids(self):
- # query = """
- # select ProbeSet.Name, ProbeSet.GeneId
- # from ProbeSet,ProbeSetXRef
- # where ProbeSetXRef.ProbeSetFreezeId = %s and
- # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- # """ % (self.id)
- # return process_and_run_query(query)
- # results = g.db.execute(query).fetchall()
- # symbol_dict = {}
- # for item in results:
- # symbol_dict[item[0]] = item[1]
- # return symbol_dict
-
-
+ return dict(results)
class TempDataSet(DataSet):
@@ -1322,8 +1081,8 @@ class TempDataSet(DataSet):
self.id = 1
self.fullname = 'Temporary Storage'
self.shortname = 'Temp'
-
-
+
+
@staticmethod
def handle_pca(desc):
if 'PCA' in desc:
@@ -1332,13 +1091,13 @@ class TempDataSet(DataSet):
else:
desc = desc[:desc.index('entered')].strip()
return desc
-
+
def get_desc(self):
g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
desc = g.db.fetchone()[0]
desc = self.handle_pca(desc)
- return desc
-
+ return desc
+
def get_group(self):
self.cursor.execute("""
SELECT
@@ -1351,7 +1110,7 @@ class TempDataSet(DataSet):
""", self.name)
self.group, self.group_id = self.cursor.fetchone()
#return self.group
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1365,7 +1124,7 @@ class TempDataSet(DataSet):
Order BY
Strain.Name
""" % escape(trait.name)
-
+
results = g.db.execute(query).fetchall()
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
new file mode 100755
index 00000000..0358bcbf
--- /dev/null
+++ b/wqflask/base/webqtlConfig.py
@@ -0,0 +1,79 @@
+#########################################'
+# Environment Variables - public
+#
+# Note: much of this needs to handled by the settings/environment
+# scripts. But rather than migrating everything in one go, we'll
+# take it a step at a time. First the hard coded paths get replaced
+# with those in utility/tools.py
+#
+#########################################
+
+from utility.tools import mk_dir, assert_dir, flat_files, TEMPDIR
+
+#Debug Level
+#1 for debug, mod python will reload import each time
+DEBUG = 1
+
+#USER privilege
+USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4}
+
+#minimum number of informative strains
+KMININFORMATIVE = 5
+
+#maximum number of traits for interval mapping
+MULTIPLEMAPPINGLIMIT = 11
+
+#maximum number of traits for correlation
+MAXCORR = 100
+
+#Daily download limit from one IP
+DAILYMAXIMUM = 1000
+
+#maximum LRS value
+MAXLRS = 460.0
+
+#temporary data life span
+MAXLIFE = 86400
+
+#MINIMUM Database public value
+PUBLICTHRESH = 0
+
+#NBCI address
+NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s"
+UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s"
+GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
+OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
+UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s";
+HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s"
+PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit"
+UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
+UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
+UCSC_GENOME = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=http://web2qtl.utmem.edu:88/snp/chr%s"
+ENSEMBLE_BLAT = 'http://www.ensembl.org/Mus_musculus/featureview?type=AffyProbe&id=%s'
+DBSNP = 'http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s'
+UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s"
+GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit"
+ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s"
+
+# The following paths are no longer in use!
+# HTMLPATH is replaced by GENODIR
+# IMGDIR is replaced by GENERATED_IMAGE_DIR
+
+# Temporary storage:
+TMPDIR = mk_dir(TEMPDIR+'/gn2/')
+CACHEDIR = mk_dir(TEMPDIR+'/cache/')
+# We can no longer write into the git tree:
+GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generated/')
+GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generated_text/')
+
+# Flat file directories
+GENODIR = flat_files('genotype')+'/'
+JSON_GENODIR = assert_dir(GENODIR+'json/')
+
+PORTADDR = "http://50.16.251.170"
+
+INFOPAGEHREF = '/dbdoc/%s.html'
+CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
+SCRIPTFILE = 'main.py'
+
diff --git a/wqflask/base/webqtlFormData.py b/wqflask/base/webqtlFormData.py
index 44fdcc3f..10251756 100755
--- a/wqflask/base/webqtlFormData.py
+++ b/wqflask/base/webqtlFormData.py
@@ -157,7 +157,7 @@ class webqtlFormData(object):
self.genotype_1 = reaper.Dataset()
- full_filename = os.path.join(webqtlConfig.GENODIR, self.group + '.geno')
+ full_filename = locate(self.group + '.geno','genotype')
# reaper barfs on unicode filenames, so here we ensure it's a string
full_filename = str(full_filename)