diff options
Diffstat (limited to 'wqflask/base')
-rw-r--r-- | wqflask/base/data_set.py | 228 | ||||
-rw-r--r-- | wqflask/base/species.py | 27 | ||||
-rw-r--r-- | wqflask/base/trait.py | 120 | ||||
-rw-r--r-- | wqflask/base/webqtlCaseData.py | 9 | ||||
-rw-r--r-- | wqflask/base/webqtlConfig.py | 25 |
5 files changed, 300 insertions, 109 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4a422ee4..1f99df49 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -26,6 +26,7 @@ import collections import codecs import json +import requests import gzip import cPickle as pickle import itertools @@ -43,8 +44,11 @@ from db import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench from utility import chunks +from utility import gen_geno_ob from utility.tools import locate, locate_ignore_error, flat_files +from wqflask.api import gen_menu + from maintenance import get_group_samplelists from MySQLdb import escape_string as escape @@ -52,7 +56,7 @@ from pprint import pformat as pf from db.gn_server import menu_main from db.call import fetchall,fetchone,fetch1 -from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists +from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL from utility.logger import getLogger logger = getLogger(__name__ ) @@ -63,7 +67,6 @@ DS_NAME_MAP = {} def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, group_name = None): if not dataset_type: dataset_type = Dataset_Getter(dataset_name) - logger.debug("dataset_type", dataset_type) dataset_ob = DS_NAME_MAP[dataset_type] dataset_class = globals()[dataset_ob] @@ -90,12 +93,9 @@ Publish or ProbeSet. E.g. """ self.datasets = {} - if USE_GN_SERVER: - data = menu_main() - else: - file_name = "wqflask/static/new/javascript/dataset_menu_structure.json" - with open(file_name, 'r') as fh: - data = json.load(fh) + data = json.loads(requests.get(GN2_BASE_URL + "/api/v_pre1/gen_dropdown").content) + #data = gen_menu.gen_dropdown_json() + for species in data['datasets']: for group in data['datasets'][species]: @@ -109,11 +109,70 @@ Publish or ProbeSet. E.g. else: new_type = "ProbeSet" self.datasets[short_dataset_name] = new_type + # Set LOG_LEVEL_DEBUG=5 to see the following: logger.debugf(5, "datasets",self.datasets) def __call__(self, name): - return self.datasets[name] + if name not in self.datasets: + mrna_expr_query = """ + SELECT + ProbeSetFreeze.Id + FROM + ProbeSetFreeze + WHERE + ProbeSetFreeze.Name = "{0}" + """.format(name) + + results = g.db.execute(geno_query).fetchall() + if len(results): + self.datasets[name] = "ProbeSet" + return self.datasets[name] + + group_name = name.replace("Publish", "") + + pheno_query = """SELECT InfoFiles.GN_AccesionId + FROM InfoFiles, PublishFreeze, InbredSet + WHERE InbredSet.Name = '{0}' AND + PublishFreeze.InbredSetId = InbredSet.Id AND + InfoFiles.InfoPageName = PublishFreeze.Name""".format(group_name) + + results = g.db.execute(pheno_query).fetchall() + if len(results): + self.datasets[name] = "Publish" + return self.datasets[name] + + #ZS: For when there isn't an InfoFiles ID; not sure if this and the preceding query are both necessary + other_pheno_query = """SELECT PublishFreeze.Name + FROM PublishFreeze, InbredSet + WHERE InbredSet.Name = '{}' AND + PublishFreeze.InbredSetId = InbredSet.Id""".format(group_name) + + results = g.db.execute(other_pheno_query).fetchall() + if len(results): + self.datasets[name] = "Publish" + return self.datasets[name] + + geno_query = """ + SELECT + GenoFreezeId + FROM + GenoFreeze + WHERE + GenoFreeze.Name = "{0}" + {1} + """.format(name) + + results = g.db.execute(geno_query).fetchall() + if len(results): + self.datasets[name] = "Geno" + return self.datasets[name] + + #ZS: It shouldn't ever reach this + return None + + else: + return self.datasets[name] # Do the intensive work at startup one time only Dataset_Getter = Dataset_Types() @@ -170,31 +229,25 @@ class Markers(object): def __init__(self, name): json_data_fh = open(locate(name + ".json",'genotype/json')) - try: - markers = [] - with open(locate(name + "_snps.txt", 'r')) as bimbam_fh: + markers = [] + with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh: + if len(bimbam_fh.readline().split(", ")) > 2: + delimiter = ", " + elif len(bimbam_fh.readline().split(",")) > 2: + delimiter = "," + elif len(bimbam_fh.readline().split("\t")) > 2: + delimiter = "\t" + else: + delimiter = " " + for line in bimbam_fh: marker = {} - if len(bimbam_fh[0].split(", ")) > 2: - delimiter = ", " - elif len(bimbam_fh[0].split(",")) > 2: - delimiter = "," - elif len(bimbam_fh[0].split("\t")) > 2: - delimiter = "\t" - else: - delimiter = " " - for line in bimbam_fh: - marker['name'] = line.split(delimiter)[0] - marker['Mb'] - marker['chr'] = line.split(delimiter)[2] - marker['cM'] - markers.append(marker) - #try: - # markers = json.load(json_data_fh) - except: - markers = [] + marker['name'] = line.split(delimiter)[0].rstrip() + marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000 + marker['chr'] = line.split(delimiter)[2].rstrip() + markers.append(marker) for marker in markers: - if (marker['chr'] != "X") and (marker['chr'] != "Y"): + if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) @@ -282,7 +335,6 @@ class DatasetGroup(object): """ def __init__(self, dataset, name=None): """This sets self.group and self.group_id""" - #logger.debug("DATASET NAME2:", dataset.name) if name == None: self.name, self.id, self.genetic_type = fetchone(dataset.query_for_group) else: @@ -294,7 +346,6 @@ class DatasetGroup(object): self.parlist = None self.get_f1_parent_strains() - self.accession_id = self.get_accession_id() self.mapping_id, self.mapping_names = self.get_mapping_methods() self.species = webqtlDatabaseFunction.retrieve_species(self.name) @@ -304,27 +355,15 @@ class DatasetGroup(object): self._datasets = None self.genofile = None - def get_accession_id(self): - results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where - InbredSet.Name = %s and - PublishFreeze.InbredSetId = InbredSet.Id and - InfoFiles.InfoPageName = PublishFreeze.Name and - PublishFreeze.public > 0 and - PublishFreeze.confidentiality < 1 order by - PublishFreeze.CreateTime desc""", (self.name)).fetchone() - - if results != None: - return str(results[0]) - else: - return "None" - def get_mapping_methods(self): mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] if mapping_id == "1": - mapping_names = ["QTLReaper", "PYLMM", "R/qtl"] + mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] elif mapping_id == "2": mapping_names = ["GEMMA"] + elif mapping_id == "3": + mapping_names = ["R/qtl"] elif mapping_id == "4": mapping_names = ["GEMMA", "PLINK"] else: @@ -333,8 +372,6 @@ class DatasetGroup(object): return mapping_id, mapping_names def get_markers(self): - logger.debug("self.species is:", self.species) - def check_plink_gemma(): if flat_file_exists("mapping"): MAPPING_PATH = flat_files("mapping")+"/" @@ -364,30 +401,32 @@ class DatasetGroup(object): if maternal and paternal: self.parlist = [maternal, paternal] + def get_genofiles(self): + jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name) + try: + f = open(jsonfile) + except: + return None + jsondata = json.load(f) + return jsondata['genofile'] + def get_samplelist(self): result = None - key = "samplelist:v2:" + self.name + key = "samplelist:v3:" + self.name if USE_REDIS: result = Redis.get(key) if result is not None: - #logger.debug("Sample List Cache hit!!!") - #logger.debug("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - #logger.debug(" type: ", type(self.samplelist)) - #logger.debug(" self.samplelist: ", self.samplelist) else: logger.debug("Cache not hit") genotype_fn = locate_ignore_error(self.name+".geno",'genotype') - mapping_fn = locate_ignore_error(self.name+".fam",'mapping') - if mapping_fn: - self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn) - elif genotype_fn: + if genotype_fn: self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn) else: self.samplelist = None - logger.debug("Sample list: ",self.samplelist) + if USE_REDIS: Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -398,19 +437,27 @@ class DatasetGroup(object): [result.extend(l) for l in lists if l] return result - def read_genotype_file(self): + def read_genotype_file(self, use_reaper=False): '''Read genotype from .geno file instead of database''' #genotype_1 is Dataset Object without parents and f1 #genotype_2 is Dataset Object with parents and f1 (not for intercross) - genotype_1 = reaper.Dataset() + #genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: - full_filename = str(locate(self.genofile, 'genotype')) + if "RData" in self.genofile: #ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData + full_filename = str(locate(self.genofile.split(".")[0] + ".geno", 'genotype')) + else: + full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) - genotype_1.read(full_filename) + + if use_reaper: + genotype_1 = reaper.Dataset() + genotype_1.read(full_filename) + else: + genotype_1 = gen_geno_ob.genotype(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) @@ -440,7 +487,8 @@ def datasets(group_name, this_group = None): WHERE PublishFreeze.InbredSetId = InbredSet.Id and InbredSet.Name = '%s' and PublishFreeze.public > %s - and PublishFreeze.confidentiality < 1) + and PublishFreeze.confidentiality < 1 + ORDER BY PublishFreeze.Id ASC) UNION (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name FROM GenoFreeze, InbredSet @@ -457,17 +505,28 @@ def datasets(group_name, this_group = None): and InbredSet.Name like %s and ProbeSetFreeze.public > %s and ProbeSetFreeze.confidentiality < 1 - ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId) + ORDER BY Tissue.Name, ProbeSetFreeze.OrderList DESC) ''' % (group_name, webqtlConfig.PUBLICTHRESH, group_name, webqtlConfig.PUBLICTHRESH, "'" + group_name + "'", webqtlConfig.PUBLICTHRESH)) - for dataset_item in the_results: + sorted_results = sorted(the_results, key=lambda kv: kv[0]) + + pheno_inserted = False #ZS: This is kind of awkward, but need to ensure Phenotypes show up before Genotypes in dropdown + geno_inserted = False + for dataset_item in sorted_results: tissue_name = dataset_item[0] dataset = dataset_item[1] dataset_short = dataset_item[2] if tissue_name in ['#PublishFreeze', '#GenoFreeze']: - dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) + if tissue_name == '#PublishFreeze' and (dataset_short == group_name + 'Publish'): + dataset_menu.insert(0, dict(tissue=None, datasets=[(dataset, dataset_short)])) + pheno_inserted = True + elif pheno_inserted and tissue_name == '#GenoFreeze': + dataset_menu.insert(1, dict(tissue=None, datasets=[(dataset, dataset_short)])) + geno_inserted = True + else: + dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) else: tissue_already_exists = False for i, tissue_dict in enumerate(dataset_menu): @@ -512,11 +571,12 @@ class DataSet(object): self.setup() if self.type == "Temp": #Need to supply group name as input if temp trait - self.group = DatasetGroup(self, group_name) # sets self.group and self.group_id and gets genotype + self.group = DatasetGroup(self, name=group_name) # sets self.group and self.group_id and gets genotype else: self.check_confidentiality() self.retrieve_other_names() self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype + self.accession_id = self.get_accession_id() if get_samplelist == True: self.group.get_samplelist() self.species = species.TheSpecies(self) @@ -531,6 +591,31 @@ class DataSet(object): def riset(): Weve_Renamed_This_As_Group + def get_accession_id(self): + if self.type == "Publish": + results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where + InbredSet.Name = %s and + PublishFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = PublishFreeze.Name and + PublishFreeze.public > 0 and + PublishFreeze.confidentiality < 1 order by + PublishFreeze.CreateTime desc""", (self.group.name)).fetchone() + elif self.type == "Geno": + results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, GenoFreeze, InbredSet where + InbredSet.Name = %s and + GenoFreeze.InbredSetId = InbredSet.Id and + InfoFiles.InfoPageName = GenoFreeze.ShortName and + GenoFreeze.public > 0 and + GenoFreeze.confidentiality < 1 order by + GenoFreeze.CreateTime desc""", (self.group.name)).fetchone() + else: + results = None + + if results != None: + return str(results[0]) + else: + return "None" + def retrieve_other_names(self): """This method fetches the the dataset names in search_result. @@ -677,6 +762,7 @@ class PhenotypeDataSet(DataSet): 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', + 'PublishXRef.mean', 'Phenotype.Lab_code', 'Publication.PubMed_ID', 'Publication.Abstract', @@ -685,13 +771,14 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields = ['name', + self.display_fields = ['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', 'original_description', 'pre_publication_abbreviation', 'post_publication_abbreviation', + 'mean', 'lab_code', 'submitter', 'owner', 'authorized_users', @@ -906,6 +993,7 @@ class MrnaAssayDataSet(DataSet): 'blatseq', 'targetseq', 'chipid', 'comments', 'strand_probe', 'strand_gene', + 'proteinid', 'uniprotid', 'probe_set_target_region', 'probe_set_specificity', 'probe_set_blat_score', diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 4ac2213c..6d99af65 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -14,10 +14,13 @@ from utility.logger import getLogger logger = getLogger(__name__ ) class TheSpecies(object): - def __init__(self, dataset): - self.dataset = dataset - #print("self.dataset is:", pf(self.dataset.__dict__)) - self.chromosomes = Chromosomes(self.dataset) + def __init__(self, dataset=None, species_name=None): + if species_name != None: + self.name = species_name + self.chromosomes = Chromosomes(species=self.name) + else: + self.dataset = dataset + self.chromosomes = Chromosomes(dataset=self.dataset) class IndChromosome(object): def __init__(self, name, length): @@ -30,11 +33,21 @@ class IndChromosome(object): return self.length / 1000000 class Chromosomes(object): - def __init__(self, dataset): - self.dataset = dataset + def __init__(self, dataset=None, species=None): self.chromosomes = collections.OrderedDict() + if species != None: + query = """ + Select + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species + where + Chr_Length.SpeciesId = Species.SpeciesId AND + Species.Name = '%s' + Order by OrderId + """ % species.capitalize() + else: + self.dataset = dataset - query = """ + query = """ Select Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet where diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 3daf9ea9..e454c593 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -3,14 +3,18 @@ from __future__ import absolute_import, division, print_function import string import resource import codecs +import requests -from htmlgen import HTMLgen2 as HT +import redis +Redis = redis.StrictRedis() from base import webqtlConfig from base.webqtlCaseData import webqtlCaseData from base.data_set import create_dataset from db import webqtlDatabaseFunction from utility import webqtlUtil +from utility import hmac +from utility.tools import GN2_BASE_URL from wqflask import app @@ -23,8 +27,6 @@ from flask import Flask, g, request, url_for from utility.logger import getLogger logger = getLogger(__name__ ) -from wqflask import user_manager - class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -35,13 +37,15 @@ class GeneralTrait(object): def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. if kw.get('dataset_name'): - self.dataset = create_dataset(kw.get('dataset_name')) - #print(" in GeneralTrait created dataset:", self.dataset) + if kw.get('dataset_name') == "Temp": + temp_group = self.name.split("_")[2] + self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + else: + self.dataset = create_dataset(kw.get('dataset_name')) else: self.dataset = kw.get('dataset') - self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -57,6 +61,7 @@ class GeneralTrait(object): self.num_overlap = None self.strand_probe = None self.symbol = None + self.display_name = self.name self.LRS_score_repr = "N/A" self.LRS_location_repr = "N/A" @@ -73,8 +78,8 @@ class GeneralTrait(object): # So we could add a simple if statement to short-circuit this if necessary if self.dataset.type != "Temp": self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info) - if get_sample_info != False: - self = retrieve_sample_data(self, self.dataset) + if get_sample_info != False: + self = retrieve_sample_data(self, self.dataset) def export_informative(self, include_variance=0): """ @@ -117,11 +122,36 @@ class GeneralTrait(object): @property def alias_fmt(self): '''Return a text formatted alias''' + + alias = 'Not available' if self.alias: alias = string.replace(self.alias, ";", " ") alias = string.join(string.split(alias), ", ") - else: - alias = 'Not available' + + return alias + + @property + def wikidata_alias_fmt(self): + '''Return a text formatted alias''' + + alias = 'Not available' + if self.symbol: + human_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) + mouse_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) + other_response = requests.get(GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) + + if human_response and mouse_response and other_response: + alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content) + + filtered_aliases = [] + seen = set() + for item in alias_list: + if item in seen: + continue + else: + filtered_aliases.append(item) + seen.add(item) + alias = "; ".join(filtered_aliases) return alias @@ -154,18 +184,27 @@ def retrieve_sample_data(trait, dataset, samplelist=None): if samplelist == None: samplelist = [] - results = dataset.retrieve_sample_data(trait.name) + if dataset.type == "Temp": + results = Redis.get(trait.name).split() + else: + results = dataset.retrieve_sample_data(trait.name) # Todo: is this necessary? If not remove trait.data.clear() - all_samples_ordered = dataset.group.all_samples_ordered() - if results: - for item in results: - name, value, variance, num_cases, name2 = item - if not samplelist or (samplelist and name in samplelist): - trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) + if dataset.type == "Temp": + all_samples_ordered = dataset.group.all_samples_ordered() + for i, item in enumerate(results): + try: + trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item)) + except: + pass + else: + for item in results: + name, value, variance, num_cases, name2 = item + if not samplelist or (samplelist and name in samplelist): + trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) return trait @app.route("/trait/get_sample_data") @@ -189,7 +228,8 @@ def get_sample_data(): trait_dict['symbol'] = trait_ob.symbol trait_dict['location'] = trait_ob.location_repr elif trait_ob.dataset.type == "Publish": - trait_dict['pubmed_link'] = trait_ob.pubmed_link + if trait_ob.pubmed_id: + trait_dict['pubmed_link'] = trait_ob.pubmed_link trait_dict['pubmed_text'] = trait_ob.pubmed_text return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems() }]) @@ -205,6 +245,7 @@ def jsonable(trait): return dict(name=trait.name, symbol=trait.symbol, dataset=dataset.name, + dataset_name = dataset.shortname, description=trait.description_display, mean=trait.mean, location=trait.location_repr, @@ -216,7 +257,9 @@ def jsonable(trait): if trait.pubmed_id: return dict(name=trait.name, dataset=dataset.name, + dataset_name = dataset.shortname, description=trait.description_display, + abbreviation=trait.abbreviation, authors=trait.authors, pubmed_text=trait.pubmed_text, pubmed_link=trait.pubmed_link, @@ -227,7 +270,9 @@ def jsonable(trait): else: return dict(name=trait.name, dataset=dataset.name, + dataset_name = dataset.shortname, description=trait.description_display, + abbreviation=trait.abbreviation, authors=trait.authors, pubmed_text=trait.pubmed_text, lrs_score=trait.LRS_score_repr, @@ -237,6 +282,7 @@ def jsonable(trait): elif dataset.type == "Geno": return dict(name=trait.name, dataset=dataset.name, + dataset_name = dataset.shortname, location=trait.location_repr ) else: @@ -258,7 +304,7 @@ def jsonable_table_row(trait, dataset_name, index): additive = "N/A" else: additive = "%.3f" % round(float(trait.additive), 2) - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.symbol, @@ -274,7 +320,7 @@ def jsonable_table_row(trait, dataset_name, index): else: additive = "%.2f" % round(float(trait.additive), 2) if trait.pubmed_id: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.description_display, @@ -284,7 +330,7 @@ def jsonable_table_row(trait, dataset_name, index): trait.LRS_location_repr, additive] else: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.description_display, @@ -294,7 +340,7 @@ def jsonable_table_row(trait, dataset_name, index): trait.LRS_location_repr, additive] elif dataset.type == "Geno": - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.location_repr] @@ -307,21 +353,22 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if dataset.type == 'Publish': query = """ SELECT - PublishXRef.Id, Publication.PubMed_ID, + PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, - Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, + Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, PublishXRef.mean, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, Publication.Authors, Publication.Title, Publication.Abstract, Publication.Journal, Publication.Volume, Publication.Pages, Publication.Month, Publication.Year, PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments FROM - PublishXRef, Publication, Phenotype, PublishFreeze + PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet WHERE PublishXRef.Id = %s AND Phenotype.Id = PublishXRef.PhenotypeId AND Publication.Id = PublishXRef.PublicationId AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishXRef.InbredSetId = InbredSet.Id AND PublishFreeze.Id = %s """ % (trait.name, dataset.id) @@ -371,17 +418,25 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait_info = g.db.execute(query, (string.join(dataset.display_fields,','), dataset.type, trait.name)).fetchone() + if trait_info: trait.haveinfo = True #XZ: assign SQL query result to trait attributes. for i, field in enumerate(dataset.display_fields): holder = trait_info[i] + # if isinstance(trait_info[i], basestring): + # logger.debug("HOLDER:", holder) + # logger.debug("HOLDER2:", holder.decode(encoding='latin1')) + # holder = unicode(trait_info[i], "utf-8", "ignore") if isinstance(trait_info[i], basestring): - holder = unicode(trait_info[i], "utf-8", "ignore") + holder = holder.encode('latin1') setattr(trait, field, holder) if dataset.type == 'Publish': + if trait.group_code: + trait.display_name = trait.group_code + "_" + str(trait.name) + trait.confidential = 0 if trait.pre_publication_description and not trait.pubmed_id: trait.confidential = 1 @@ -392,6 +447,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): #phenotype traits, then display the pre-publication description instead #of the post-publication description if trait.confidential: + trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description #if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -401,11 +457,16 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): # # description = self.pre_publication_description else: + trait.abbreviation = trait.post_publication_abbreviation if description: trait.description_display = description.strip() else: trait.description_display = "" + trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: @@ -443,7 +504,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): #LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" + trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" if dataset.type == 'ProbeSet' and not trait.cellid: + trait.mean = "" query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive @@ -474,9 +537,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.mean = trait.additive = "" - if dataset.type == 'Publish': query = """ diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 24de8dcb..d8487f01 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -49,7 +49,7 @@ class webqtlCaseData(object): if self.variance != None: str += " variance=%2.3f" % self.variance if self.num_cases: - str += " ndata=%d" % self.num_cases + str += " ndata=%s" % self.num_cases if self.name: str += " name=%s" % self.name if self.name2: @@ -77,3 +77,10 @@ class webqtlCaseData(object): return "%2.3f" % self.variance else: return "x" + + @property + def display_num_cases(self): + if self.num_cases != None: + return "%s" % self.num_cases + else: + return "x" diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 4708bf0a..018d5d54 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -34,6 +34,30 @@ PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db= UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' +GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" +NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s" +GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" +OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" +UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s" +HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/homologene/?term=%s" +GENOTATION_URL = "http://www.genotation.org/Getd2g.pl?gene_list=%s" +GTEX_URL = "https://www.gtexportal.org/home/gene/%s" +GENEBRIDGE_URL = "https://www.systems-genetics.org/modules_by_gene/%s?organism=%s" +GENEMANIA_URL = "https://genemania.org/search/%s/%s" +UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s" +BIOGPS_URL = "http://biogps.org/?org=%s#goto=genereport&id=%s" +STRING_URL = "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=%s" +PANTHER_URL = "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=%s" +GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s" +ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s" +EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s" +WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s" +ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" +DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s' +PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s" +OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s" +UNIPROT_URL = "https://www.uniprot.org/uniprot/%s" +RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s" # Temporary storage (note that this TMPDIR can be set as an # environment variable - use utility.tools.TEMPDIR when you @@ -68,4 +92,3 @@ if not valid_path(JSON_GENODIR): PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' -SCRIPTFILE = 'main.py' |