aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base')
-rw-r--r--wqflask/base/data_set.py228
-rw-r--r--wqflask/base/species.py27
-rw-r--r--wqflask/base/trait.py120
-rw-r--r--wqflask/base/webqtlCaseData.py9
-rw-r--r--wqflask/base/webqtlConfig.py25
5 files changed, 300 insertions, 109 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 4a422ee4..1f99df49 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -26,6 +26,7 @@ import collections
import codecs
import json
+import requests
import gzip
import cPickle as pickle
import itertools
@@ -43,8 +44,11 @@ from db import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
from utility import chunks
+from utility import gen_geno_ob
from utility.tools import locate, locate_ignore_error, flat_files
+from wqflask.api import gen_menu
+
from maintenance import get_group_samplelists
from MySQLdb import escape_string as escape
@@ -52,7 +56,7 @@ from pprint import pformat as pf
from db.gn_server import menu_main
from db.call import fetchall,fetchone,fetch1
-from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists
+from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL
from utility.logger import getLogger
logger = getLogger(__name__ )
@@ -63,7 +67,6 @@ DS_NAME_MAP = {}
def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, group_name = None):
if not dataset_type:
dataset_type = Dataset_Getter(dataset_name)
- logger.debug("dataset_type", dataset_type)
dataset_ob = DS_NAME_MAP[dataset_type]
dataset_class = globals()[dataset_ob]
@@ -90,12 +93,9 @@ Publish or ProbeSet. E.g.
"""
self.datasets = {}
- if USE_GN_SERVER:
- data = menu_main()
- else:
- file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
- with open(file_name, 'r') as fh:
- data = json.load(fh)
+ data = json.loads(requests.get(GN2_BASE_URL + "/api/v_pre1/gen_dropdown").content)
+ #data = gen_menu.gen_dropdown_json()
+
for species in data['datasets']:
for group in data['datasets'][species]:
@@ -109,11 +109,70 @@ Publish or ProbeSet. E.g.
else:
new_type = "ProbeSet"
self.datasets[short_dataset_name] = new_type
+
# Set LOG_LEVEL_DEBUG=5 to see the following:
logger.debugf(5, "datasets",self.datasets)
def __call__(self, name):
- return self.datasets[name]
+ if name not in self.datasets:
+ mrna_expr_query = """
+ SELECT
+ ProbeSetFreeze.Id
+ FROM
+ ProbeSetFreeze
+ WHERE
+ ProbeSetFreeze.Name = "{0}"
+ """.format(name)
+
+ results = g.db.execute(geno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "ProbeSet"
+ return self.datasets[name]
+
+ group_name = name.replace("Publish", "")
+
+ pheno_query = """SELECT InfoFiles.GN_AccesionId
+ FROM InfoFiles, PublishFreeze, InbredSet
+ WHERE InbredSet.Name = '{0}' AND
+ PublishFreeze.InbredSetId = InbredSet.Id AND
+ InfoFiles.InfoPageName = PublishFreeze.Name""".format(group_name)
+
+ results = g.db.execute(pheno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Publish"
+ return self.datasets[name]
+
+ #ZS: For when there isn't an InfoFiles ID; not sure if this and the preceding query are both necessary
+ other_pheno_query = """SELECT PublishFreeze.Name
+ FROM PublishFreeze, InbredSet
+ WHERE InbredSet.Name = '{}' AND
+ PublishFreeze.InbredSetId = InbredSet.Id""".format(group_name)
+
+ results = g.db.execute(other_pheno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Publish"
+ return self.datasets[name]
+
+ geno_query = """
+ SELECT
+ GenoFreezeId
+ FROM
+ GenoFreeze
+ WHERE
+ GenoFreeze.Name = "{0}"
+ {1}
+ """.format(name)
+
+ results = g.db.execute(geno_query).fetchall()
+ if len(results):
+ self.datasets[name] = "Geno"
+ return self.datasets[name]
+
+ #ZS: It shouldn't ever reach this
+ return None
+
+ else:
+ return self.datasets[name]
# Do the intensive work at startup one time only
Dataset_Getter = Dataset_Types()
@@ -170,31 +229,25 @@ class Markers(object):
def __init__(self, name):
json_data_fh = open(locate(name + ".json",'genotype/json'))
- try:
- markers = []
- with open(locate(name + "_snps.txt", 'r')) as bimbam_fh:
+ markers = []
+ with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh:
+ if len(bimbam_fh.readline().split(", ")) > 2:
+ delimiter = ", "
+ elif len(bimbam_fh.readline().split(",")) > 2:
+ delimiter = ","
+ elif len(bimbam_fh.readline().split("\t")) > 2:
+ delimiter = "\t"
+ else:
+ delimiter = " "
+ for line in bimbam_fh:
marker = {}
- if len(bimbam_fh[0].split(", ")) > 2:
- delimiter = ", "
- elif len(bimbam_fh[0].split(",")) > 2:
- delimiter = ","
- elif len(bimbam_fh[0].split("\t")) > 2:
- delimiter = "\t"
- else:
- delimiter = " "
- for line in bimbam_fh:
- marker['name'] = line.split(delimiter)[0]
- marker['Mb']
- marker['chr'] = line.split(delimiter)[2]
- marker['cM']
- markers.append(marker)
- #try:
- # markers = json.load(json_data_fh)
- except:
- markers = []
+ marker['name'] = line.split(delimiter)[0].rstrip()
+ marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000
+ marker['chr'] = line.split(delimiter)[2].rstrip()
+ markers.append(marker)
for marker in markers:
- if (marker['chr'] != "X") and (marker['chr'] != "Y"):
+ if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"):
marker['chr'] = int(marker['chr'])
marker['Mb'] = float(marker['Mb'])
@@ -282,7 +335,6 @@ class DatasetGroup(object):
"""
def __init__(self, dataset, name=None):
"""This sets self.group and self.group_id"""
- #logger.debug("DATASET NAME2:", dataset.name)
if name == None:
self.name, self.id, self.genetic_type = fetchone(dataset.query_for_group)
else:
@@ -294,7 +346,6 @@ class DatasetGroup(object):
self.parlist = None
self.get_f1_parent_strains()
- self.accession_id = self.get_accession_id()
self.mapping_id, self.mapping_names = self.get_mapping_methods()
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
@@ -304,27 +355,15 @@ class DatasetGroup(object):
self._datasets = None
self.genofile = None
- def get_accession_id(self):
- results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
- InbredSet.Name = %s and
- PublishFreeze.InbredSetId = InbredSet.Id and
- InfoFiles.InfoPageName = PublishFreeze.Name and
- PublishFreeze.public > 0 and
- PublishFreeze.confidentiality < 1 order by
- PublishFreeze.CreateTime desc""", (self.name)).fetchone()
-
- if results != None:
- return str(results[0])
- else:
- return "None"
-
def get_mapping_methods(self):
mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0]
if mapping_id == "1":
- mapping_names = ["QTLReaper", "PYLMM", "R/qtl"]
+ mapping_names = ["GEMMA", "QTLReaper", "R/qtl"]
elif mapping_id == "2":
mapping_names = ["GEMMA"]
+ elif mapping_id == "3":
+ mapping_names = ["R/qtl"]
elif mapping_id == "4":
mapping_names = ["GEMMA", "PLINK"]
else:
@@ -333,8 +372,6 @@ class DatasetGroup(object):
return mapping_id, mapping_names
def get_markers(self):
- logger.debug("self.species is:", self.species)
-
def check_plink_gemma():
if flat_file_exists("mapping"):
MAPPING_PATH = flat_files("mapping")+"/"
@@ -364,30 +401,32 @@ class DatasetGroup(object):
if maternal and paternal:
self.parlist = [maternal, paternal]
+ def get_genofiles(self):
+ jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
+ try:
+ f = open(jsonfile)
+ except:
+ return None
+ jsondata = json.load(f)
+ return jsondata['genofile']
+
def get_samplelist(self):
result = None
- key = "samplelist:v2:" + self.name
+ key = "samplelist:v3:" + self.name
if USE_REDIS:
result = Redis.get(key)
if result is not None:
- #logger.debug("Sample List Cache hit!!!")
- #logger.debug("Before unjsonifying {}: {}".format(type(result), result))
self.samplelist = json.loads(result)
- #logger.debug(" type: ", type(self.samplelist))
- #logger.debug(" self.samplelist: ", self.samplelist)
else:
logger.debug("Cache not hit")
genotype_fn = locate_ignore_error(self.name+".geno",'genotype')
- mapping_fn = locate_ignore_error(self.name+".fam",'mapping')
- if mapping_fn:
- self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn)
- elif genotype_fn:
+ if genotype_fn:
self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn)
else:
self.samplelist = None
- logger.debug("Sample list: ",self.samplelist)
+
if USE_REDIS:
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
@@ -398,19 +437,27 @@ class DatasetGroup(object):
[result.extend(l) for l in lists if l]
return result
- def read_genotype_file(self):
+ def read_genotype_file(self, use_reaper=False):
'''Read genotype from .geno file instead of database'''
#genotype_1 is Dataset Object without parents and f1
#genotype_2 is Dataset Object with parents and f1 (not for intercross)
- genotype_1 = reaper.Dataset()
+ #genotype_1 = reaper.Dataset()
# reaper barfs on unicode filenames, so here we ensure it's a string
if self.genofile:
- full_filename = str(locate(self.genofile, 'genotype'))
+ if "RData" in self.genofile: #ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData
+ full_filename = str(locate(self.genofile.split(".")[0] + ".geno", 'genotype'))
+ else:
+ full_filename = str(locate(self.genofile, 'genotype'))
else:
full_filename = str(locate(self.name + '.geno', 'genotype'))
- genotype_1.read(full_filename)
+
+ if use_reaper:
+ genotype_1 = reaper.Dataset()
+ genotype_1.read(full_filename)
+ else:
+ genotype_1 = gen_geno_ob.genotype(full_filename)
if genotype_1.type == "group" and self.parlist:
genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
@@ -440,7 +487,8 @@ def datasets(group_name, this_group = None):
WHERE PublishFreeze.InbredSetId = InbredSet.Id
and InbredSet.Name = '%s'
and PublishFreeze.public > %s
- and PublishFreeze.confidentiality < 1)
+ and PublishFreeze.confidentiality < 1
+ ORDER BY PublishFreeze.Id ASC)
UNION
(SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name
FROM GenoFreeze, InbredSet
@@ -457,17 +505,28 @@ def datasets(group_name, this_group = None):
and InbredSet.Name like %s
and ProbeSetFreeze.public > %s
and ProbeSetFreeze.confidentiality < 1
- ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId)
+ ORDER BY Tissue.Name, ProbeSetFreeze.OrderList DESC)
''' % (group_name, webqtlConfig.PUBLICTHRESH,
group_name, webqtlConfig.PUBLICTHRESH,
"'" + group_name + "'", webqtlConfig.PUBLICTHRESH))
- for dataset_item in the_results:
+ sorted_results = sorted(the_results, key=lambda kv: kv[0])
+
+ pheno_inserted = False #ZS: This is kind of awkward, but need to ensure Phenotypes show up before Genotypes in dropdown
+ geno_inserted = False
+ for dataset_item in sorted_results:
tissue_name = dataset_item[0]
dataset = dataset_item[1]
dataset_short = dataset_item[2]
if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
- dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ if tissue_name == '#PublishFreeze' and (dataset_short == group_name + 'Publish'):
+ dataset_menu.insert(0, dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ pheno_inserted = True
+ elif pheno_inserted and tissue_name == '#GenoFreeze':
+ dataset_menu.insert(1, dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ geno_inserted = True
+ else:
+ dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
else:
tissue_already_exists = False
for i, tissue_dict in enumerate(dataset_menu):
@@ -512,11 +571,12 @@ class DataSet(object):
self.setup()
if self.type == "Temp": #Need to supply group name as input if temp trait
- self.group = DatasetGroup(self, group_name) # sets self.group and self.group_id and gets genotype
+ self.group = DatasetGroup(self, name=group_name) # sets self.group and self.group_id and gets genotype
else:
self.check_confidentiality()
self.retrieve_other_names()
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
+ self.accession_id = self.get_accession_id()
if get_samplelist == True:
self.group.get_samplelist()
self.species = species.TheSpecies(self)
@@ -531,6 +591,31 @@ class DataSet(object):
def riset():
Weve_Renamed_This_As_Group
+ def get_accession_id(self):
+ if self.type == "Publish":
+ results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
+ InbredSet.Name = %s and
+ PublishFreeze.InbredSetId = InbredSet.Id and
+ InfoFiles.InfoPageName = PublishFreeze.Name and
+ PublishFreeze.public > 0 and
+ PublishFreeze.confidentiality < 1 order by
+ PublishFreeze.CreateTime desc""", (self.group.name)).fetchone()
+ elif self.type == "Geno":
+ results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where
+ InbredSet.Name = %s and
+ GenoFreeze.InbredSetId = InbredSet.Id and
+ InfoFiles.InfoPageName = GenoFreeze.ShortName and
+ GenoFreeze.public > 0 and
+ GenoFreeze.confidentiality < 1 order by
+ GenoFreeze.CreateTime desc""", (self.group.name)).fetchone()
+ else:
+ results = None
+
+ if results != None:
+ return str(results[0])
+ else:
+ return "None"
+
def retrieve_other_names(self):
"""This method fetches the the dataset names in search_result.
@@ -677,6 +762,7 @@ class PhenotypeDataSet(DataSet):
'Phenotype.Pre_publication_description',
'Phenotype.Pre_publication_abbreviation',
'Phenotype.Post_publication_abbreviation',
+ 'PublishXRef.mean',
'Phenotype.Lab_code',
'Publication.PubMed_ID',
'Publication.Abstract',
@@ -685,13 +771,14 @@ class PhenotypeDataSet(DataSet):
'PublishXRef.Id']
# Figure out what display_fields is
- self.display_fields = ['name',
+ self.display_fields = ['name', 'group_code',
'pubmed_id',
'pre_publication_description',
'post_publication_description',
'original_description',
'pre_publication_abbreviation',
'post_publication_abbreviation',
+ 'mean',
'lab_code',
'submitter', 'owner',
'authorized_users',
@@ -906,6 +993,7 @@ class MrnaAssayDataSet(DataSet):
'blatseq', 'targetseq',
'chipid', 'comments',
'strand_probe', 'strand_gene',
+ 'proteinid', 'uniprotid',
'probe_set_target_region',
'probe_set_specificity',
'probe_set_blat_score',
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index 4ac2213c..6d99af65 100644
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -14,10 +14,13 @@ from utility.logger import getLogger
logger = getLogger(__name__ )
class TheSpecies(object):
- def __init__(self, dataset):
- self.dataset = dataset
- #print("self.dataset is:", pf(self.dataset.__dict__))
- self.chromosomes = Chromosomes(self.dataset)
+ def __init__(self, dataset=None, species_name=None):
+ if species_name != None:
+ self.name = species_name
+ self.chromosomes = Chromosomes(species=self.name)
+ else:
+ self.dataset = dataset
+ self.chromosomes = Chromosomes(dataset=self.dataset)
class IndChromosome(object):
def __init__(self, name, length):
@@ -30,11 +33,21 @@ class IndChromosome(object):
return self.length / 1000000
class Chromosomes(object):
- def __init__(self, dataset):
- self.dataset = dataset
+ def __init__(self, dataset=None, species=None):
self.chromosomes = collections.OrderedDict()
+ if species != None:
+ query = """
+ Select
+ Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
+ where
+ Chr_Length.SpeciesId = Species.SpeciesId AND
+ Species.Name = '%s'
+ Order by OrderId
+ """ % species.capitalize()
+ else:
+ self.dataset = dataset
- query = """
+ query = """
Select
Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
where
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 3daf9ea9..e454c593 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -3,14 +3,18 @@ from __future__ import absolute_import, division, print_function
import string
import resource
import codecs
+import requests
-from htmlgen import HTMLgen2 as HT
+import redis
+Redis = redis.StrictRedis()
from base import webqtlConfig
from base.webqtlCaseData import webqtlCaseData
from base.data_set import create_dataset
from db import webqtlDatabaseFunction
from utility import webqtlUtil
+from utility import hmac
+from utility.tools import GN2_BASE_URL
from wqflask import app
@@ -23,8 +27,6 @@ from flask import Flask, g, request, url_for
from utility.logger import getLogger
logger = getLogger(__name__ )
-from wqflask import user_manager
-
class GeneralTrait(object):
"""
Trait class defines a trait in webqtl, can be either Microarray,
@@ -35,13 +37,15 @@ class GeneralTrait(object):
def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
# xor assertion
assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
+ self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
if kw.get('dataset_name'):
- self.dataset = create_dataset(kw.get('dataset_name'))
- #print(" in GeneralTrait created dataset:", self.dataset)
+ if kw.get('dataset_name') == "Temp":
+ temp_group = self.name.split("_")[2]
+ self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group)
+ else:
+ self.dataset = create_dataset(kw.get('dataset_name'))
else:
self.dataset = kw.get('dataset')
- self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
- #print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -57,6 +61,7 @@ class GeneralTrait(object):
self.num_overlap = None
self.strand_probe = None
self.symbol = None
+ self.display_name = self.name
self.LRS_score_repr = "N/A"
self.LRS_location_repr = "N/A"
@@ -73,8 +78,8 @@ class GeneralTrait(object):
# So we could add a simple if statement to short-circuit this if necessary
if self.dataset.type != "Temp":
self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
- if get_sample_info != False:
- self = retrieve_sample_data(self, self.dataset)
+ if get_sample_info != False:
+ self = retrieve_sample_data(self, self.dataset)
def export_informative(self, include_variance=0):
"""
@@ -117,11 +122,36 @@ class GeneralTrait(object):
@property
def alias_fmt(self):
'''Return a text formatted alias'''
+
+ alias = 'Not available'
if self.alias:
alias = string.replace(self.alias, ";", " ")
alias = string.join(string.split(alias), ", ")
- else:
- alias = 'Not available'
+
+ return alias
+
+ @property
+ def wikidata_alias_fmt(self):
+ '''Return a text formatted alias'''
+
+ alias = 'Not available'
+ if self.symbol:
+ human_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper())
+ mouse_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize())
+ other_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower())
+
+ if human_response and mouse_response and other_response:
+ alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content)
+
+ filtered_aliases = []
+ seen = set()
+ for item in alias_list:
+ if item in seen:
+ continue
+ else:
+ filtered_aliases.append(item)
+ seen.add(item)
+ alias = "; ".join(filtered_aliases)
return alias
@@ -154,18 +184,27 @@ def retrieve_sample_data(trait, dataset, samplelist=None):
if samplelist == None:
samplelist = []
- results = dataset.retrieve_sample_data(trait.name)
+ if dataset.type == "Temp":
+ results = Redis.get(trait.name).split()
+ else:
+ results = dataset.retrieve_sample_data(trait.name)
# Todo: is this necessary? If not remove
trait.data.clear()
- all_samples_ordered = dataset.group.all_samples_ordered()
-
if results:
- for item in results:
- name, value, variance, num_cases, name2 = item
- if not samplelist or (samplelist and name in samplelist):
- trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
+ if dataset.type == "Temp":
+ all_samples_ordered = dataset.group.all_samples_ordered()
+ for i, item in enumerate(results):
+ try:
+ trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item))
+ except:
+ pass
+ else:
+ for item in results:
+ name, value, variance, num_cases, name2 = item
+ if not samplelist or (samplelist and name in samplelist):
+ trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
return trait
@app.route("/trait/get_sample_data")
@@ -189,7 +228,8 @@ def get_sample_data():
trait_dict['symbol'] = trait_ob.symbol
trait_dict['location'] = trait_ob.location_repr
elif trait_ob.dataset.type == "Publish":
- trait_dict['pubmed_link'] = trait_ob.pubmed_link
+ if trait_ob.pubmed_id:
+ trait_dict['pubmed_link'] = trait_ob.pubmed_link
trait_dict['pubmed_text'] = trait_ob.pubmed_text
return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }])
@@ -205,6 +245,7 @@ def jsonable(trait):
return dict(name=trait.name,
symbol=trait.symbol,
dataset=dataset.name,
+ dataset_name = dataset.shortname,
description=trait.description_display,
mean=trait.mean,
location=trait.location_repr,
@@ -216,7 +257,9 @@ def jsonable(trait):
if trait.pubmed_id:
return dict(name=trait.name,
dataset=dataset.name,
+ dataset_name = dataset.shortname,
description=trait.description_display,
+ abbreviation=trait.abbreviation,
authors=trait.authors,
pubmed_text=trait.pubmed_text,
pubmed_link=trait.pubmed_link,
@@ -227,7 +270,9 @@ def jsonable(trait):
else:
return dict(name=trait.name,
dataset=dataset.name,
+ dataset_name = dataset.shortname,
description=trait.description_display,
+ abbreviation=trait.abbreviation,
authors=trait.authors,
pubmed_text=trait.pubmed_text,
lrs_score=trait.LRS_score_repr,
@@ -237,6 +282,7 @@ def jsonable(trait):
elif dataset.type == "Geno":
return dict(name=trait.name,
dataset=dataset.name,
+ dataset_name = dataset.shortname,
location=trait.location_repr
)
else:
@@ -258,7 +304,7 @@ def jsonable_table_row(trait, dataset_name, index):
additive = "N/A"
else:
additive = "%.3f" % round(float(trait.additive), 2)
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
index,
'<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
trait.symbol,
@@ -274,7 +320,7 @@ def jsonable_table_row(trait, dataset_name, index):
else:
additive = "%.2f" % round(float(trait.additive), 2)
if trait.pubmed_id:
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
index,
'<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
trait.description_display,
@@ -284,7 +330,7 @@ def jsonable_table_row(trait, dataset_name, index):
trait.LRS_location_repr,
additive]
else:
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
index,
'<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
trait.description_display,
@@ -294,7 +340,7 @@ def jsonable_table_row(trait, dataset_name, index):
trait.LRS_location_repr,
additive]
elif dataset.type == "Geno":
- return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
+ return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
index,
'<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
trait.location_repr]
@@ -307,21 +353,22 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
if dataset.type == 'Publish':
query = """
SELECT
- PublishXRef.Id, Publication.PubMed_ID,
+ PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID,
Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description,
- Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation,
+ Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, PublishXRef.mean,
Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users,
Publication.Authors, Publication.Title, Publication.Abstract,
Publication.Journal, Publication.Volume, Publication.Pages,
Publication.Month, Publication.Year, PublishXRef.Sequence,
Phenotype.Units, PublishXRef.comments
FROM
- PublishXRef, Publication, Phenotype, PublishFreeze
+ PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet
WHERE
PublishXRef.Id = %s AND
Phenotype.Id = PublishXRef.PhenotypeId AND
Publication.Id = PublishXRef.PublicationId AND
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
+ PublishXRef.InbredSetId = InbredSet.Id AND
PublishFreeze.Id = %s
""" % (trait.name, dataset.id)
@@ -371,17 +418,25 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
trait_info = g.db.execute(query,
(string.join(dataset.display_fields,','),
dataset.type, trait.name)).fetchone()
+
if trait_info:
trait.haveinfo = True
#XZ: assign SQL query result to trait attributes.
for i, field in enumerate(dataset.display_fields):
holder = trait_info[i]
+ # if isinstance(trait_info[i], basestring):
+ # logger.debug("HOLDER:", holder)
+ # logger.debug("HOLDER2:", holder.decode(encoding='latin1'))
+ # holder = unicode(trait_info[i], "utf-8", "ignore")
if isinstance(trait_info[i], basestring):
- holder = unicode(trait_info[i], "utf-8", "ignore")
+ holder = holder.encode('latin1')
setattr(trait, field, holder)
if dataset.type == 'Publish':
+ if trait.group_code:
+ trait.display_name = trait.group_code + "_" + str(trait.name)
+
trait.confidential = 0
if trait.pre_publication_description and not trait.pubmed_id:
trait.confidential = 1
@@ -392,6 +447,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
#phenotype traits, then display the pre-publication description instead
#of the post-publication description
if trait.confidential:
+ trait.abbreviation = trait.pre_publication_abbreviation
trait.description_display = trait.pre_publication_description
#if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
@@ -401,11 +457,16 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
#
# description = self.pre_publication_description
else:
+ trait.abbreviation = trait.post_publication_abbreviation
if description:
trait.description_display = description.strip()
else:
trait.description_display = ""
+ trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace")
+ trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace")
+ trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace")
+
if not trait.year.isdigit():
trait.pubmed_text = "N/A"
else:
@@ -443,7 +504,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
#LRS and its location
trait.LRS_score_repr = "N/A"
trait.LRS_location_repr = "N/A"
+ trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = ""
if dataset.type == 'ProbeSet' and not trait.cellid:
+ trait.mean = ""
query = """
SELECT
ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
@@ -474,9 +537,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
else:
trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = ""
- else:
- trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.mean = trait.additive = ""
-
if dataset.type == 'Publish':
query = """
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 24de8dcb..d8487f01 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -49,7 +49,7 @@ class webqtlCaseData(object):
if self.variance != None:
str += " variance=%2.3f" % self.variance
if self.num_cases:
- str += " ndata=%d" % self.num_cases
+ str += " ndata=%s" % self.num_cases
if self.name:
str += " name=%s" % self.name
if self.name2:
@@ -77,3 +77,10 @@ class webqtlCaseData(object):
return "%2.3f" % self.variance
else:
return "x"
+
+ @property
+ def display_num_cases(self):
+ if self.num_cases != None:
+ return "%s" % self.num_cases
+ else:
+ return "x"
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 4708bf0a..018d5d54 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -34,6 +34,30 @@ PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=
UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
+GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s"
+NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s"
+GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
+OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
+UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s"
+HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/homologene/?term=%s"
+GENOTATION_URL = "http://www.genotation.org/Getd2g.pl?gene_list=%s"
+GTEX_URL = "https://www.gtexportal.org/home/gene/%s"
+GENEBRIDGE_URL = "https://www.systems-genetics.org/modules_by_gene/%s?organism=%s"
+GENEMANIA_URL = "https://genemania.org/search/%s/%s"
+UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s"
+BIOGPS_URL = "http://biogps.org/?org=%s#goto=genereport&id=%s"
+STRING_URL = "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=%s"
+PANTHER_URL = "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=%s"
+GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s"
+ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s"
+EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s"
+WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s"
+ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s"
+DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s'
+PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s"
+OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s"
+UNIPROT_URL = "https://www.uniprot.org/uniprot/%s"
+RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s"
# Temporary storage (note that this TMPDIR can be set as an
# environment variable - use utility.tools.TEMPDIR when you
@@ -68,4 +92,3 @@ if not valid_path(JSON_GENODIR):
PORTADDR = "http://50.16.251.170"
INFOPAGEHREF = '/dbdoc/%s.html'
CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
-SCRIPTFILE = 'main.py'