diff options
author | zsloan | 2020-04-21 16:54:41 -0500 |
---|---|---|
committer | GitHub | 2020-04-21 16:54:41 -0500 |
commit | 821465df32bfcbab73a654d1e2386f2a07f4695f (patch) | |
tree | 871c8502774cb7a8fce950f46285f62ba3d0b113 /wqflask/base | |
parent | d249d8fa90eabd47020926fdadbfe22ac2bba900 (diff) | |
parent | cf8b4c21d81efaa01d347478dc126e6d9b53f7a9 (diff) | |
download | genenetwork2-821465df32bfcbab73a654d1e2386f2a07f4695f.tar.gz |
Merge pull request #2 from genenetwork/testing
Pulling changes from main branch
Diffstat (limited to 'wqflask/base')
-rw-r--r-- | wqflask/base/data_set.py | 54 | ||||
-rw-r--r-- | wqflask/base/trait.py | 68 | ||||
-rw-r--r-- | wqflask/base/webqtlConfig.py | 6 |
3 files changed, 101 insertions, 27 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index d766e284..ebf3f021 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -26,6 +26,7 @@ import collections import codecs import json +import requests import gzip import cPickle as pickle import itertools @@ -46,6 +47,8 @@ from utility import chunks from utility import gen_geno_ob from utility.tools import locate, locate_ignore_error, flat_files +from wqflask.api import gen_menu + from maintenance import get_group_samplelists from MySQLdb import escape_string as escape @@ -61,7 +64,7 @@ logger = getLogger(__name__ ) # Each subclass will add to this DS_NAME_MAP = {} -def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, group_name = None): +def create_dataset(dataset_name, rebuild=True, dataset_type = None, get_samplelist = True, group_name = None): if not dataset_type: dataset_type = Dataset_Getter(dataset_name) logger.debug("dataset_type", dataset_type) @@ -75,7 +78,7 @@ def create_dataset(dataset_name, dataset_type = None, get_samplelist = True, gro class Dataset_Types(object): - def __init__(self): + def __init__(self, rebuild=False): """Create a dictionary of samples where the value is set to Geno, Publish or ProbeSet. E.g. @@ -91,8 +94,10 @@ Publish or ProbeSet. E.g. """ self.datasets = {} - if USE_GN_SERVER: - data = menu_main() + if rebuild: #ZS: May make this the only option + data = json.loads(requests.get("http://gn2.genenetwork.org/api/v_pre1/gen_dropdown").content) + logger.debug("THE DATA:", data) + #data = gen_menu.gen_dropdown_json() else: file_name = "wqflask/static/new/javascript/dataset_menu_structure.json" with open(file_name, 'r') as fh: @@ -190,7 +195,7 @@ class Markers(object): markers.append(marker) for marker in markers: - if (marker['chr'] != "X") and (marker['chr'] != "Y"): + if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) @@ -302,9 +307,11 @@ class DatasetGroup(object): mapping_id = g.db.execute("select MappingMethodId from InbredSet where Name= '%s'" % self.name).fetchone()[0] if mapping_id == "1": - mapping_names = ["QTLReaper", "R/qtl"] + mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] elif mapping_id == "2": mapping_names = ["GEMMA"] + elif mapping_id == "3": + mapping_names = ["R/qtl"] elif mapping_id == "4": mapping_names = ["GEMMA", "PLINK"] else: @@ -342,9 +349,18 @@ class DatasetGroup(object): if maternal and paternal: self.parlist = [maternal, paternal] + def get_genofiles(self): + jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name) + try: + f = open(jsonfile) + except: + return None + jsondata = json.load(f) + return jsondata['genofile'] + def get_samplelist(self): result = None - key = "samplelist:v2:" + self.name + key = "samplelist:v3:" + self.name if USE_REDIS: result = Redis.get(key) @@ -378,7 +394,10 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string if self.genofile: - full_filename = str(locate(self.genofile, 'genotype')) + if "RData" in self.genofile: #ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData + full_filename = str(locate(self.genofile.split(".")[0] + ".geno", 'genotype')) + else: + full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) @@ -416,7 +435,8 @@ def datasets(group_name, this_group = None): WHERE PublishFreeze.InbredSetId = InbredSet.Id and InbredSet.Name = '%s' and PublishFreeze.public > %s - and PublishFreeze.confidentiality < 1) + and PublishFreeze.confidentiality < 1 + ORDER BY PublishFreeze.Id ASC) UNION (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name FROM GenoFreeze, InbredSet @@ -440,12 +460,21 @@ def datasets(group_name, this_group = None): sorted_results = sorted(the_results, key=lambda kv: kv[0]) + pheno_inserted = False #ZS: This is kind of awkward, but need to ensure Phenotypes show up before Genotypes in dropdown + geno_inserted = False for dataset_item in sorted_results: tissue_name = dataset_item[0] dataset = dataset_item[1] dataset_short = dataset_item[2] if tissue_name in ['#PublishFreeze', '#GenoFreeze']: - dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) + if tissue_name == '#PublishFreeze' and (dataset_short == group_name + 'Publish'): + dataset_menu.insert(0, dict(tissue=None, datasets=[(dataset, dataset_short)])) + pheno_inserted = True + elif pheno_inserted and tissue_name == '#GenoFreeze': + dataset_menu.insert(1, dict(tissue=None, datasets=[(dataset, dataset_short)])) + geno_inserted = True + else: + dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) else: tissue_already_exists = False for i, tissue_dict in enumerate(dataset_menu): @@ -681,6 +710,7 @@ class PhenotypeDataSet(DataSet): 'Phenotype.Pre_publication_description', 'Phenotype.Pre_publication_abbreviation', 'Phenotype.Post_publication_abbreviation', + 'PublishXRef.mean', 'Phenotype.Lab_code', 'Publication.PubMed_ID', 'Publication.Abstract', @@ -689,13 +719,14 @@ class PhenotypeDataSet(DataSet): 'PublishXRef.Id'] # Figure out what display_fields is - self.display_fields = ['name', + self.display_fields = ['name', 'group_code', 'pubmed_id', 'pre_publication_description', 'post_publication_description', 'original_description', 'pre_publication_abbreviation', 'post_publication_abbreviation', + 'mean', 'lab_code', 'submitter', 'owner', 'authorized_users', @@ -910,6 +941,7 @@ class MrnaAssayDataSet(DataSet): 'blatseq', 'targetseq', 'chipid', 'comments', 'strand_probe', 'strand_gene', + 'proteinid', 'uniprotid', 'probe_set_target_region', 'probe_set_specificity', 'probe_set_blat_score', diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 6fecf725..5525472e 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function import string import resource import codecs +import requests import redis Redis = redis.StrictRedis() @@ -12,6 +13,7 @@ from base.webqtlCaseData import webqtlCaseData from base.data_set import create_dataset from db import webqtlDatabaseFunction from utility import webqtlUtil +from utility import hmac from wqflask import app @@ -24,8 +26,6 @@ from flask import Flask, g, request, url_for from utility.logger import getLogger logger = getLogger(__name__ ) -from wqflask import user_manager - class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -60,6 +60,7 @@ class GeneralTrait(object): self.num_overlap = None self.strand_probe = None self.symbol = None + self.display_name = self.name self.LRS_score_repr = "N/A" self.LRS_location_repr = "N/A" @@ -120,11 +121,36 @@ class GeneralTrait(object): @property def alias_fmt(self): '''Return a text formatted alias''' + + alias = 'Not available' if self.alias: alias = string.replace(self.alias, ";", " ") alias = string.join(string.split(alias), ", ") - else: - alias = 'Not available' + + return alias + + @property + def wikidata_alias_fmt(self): + '''Return a text formatted alias''' + + alias = 'Not available' + if self.symbol: + human_response = requests.get("http://gn2.genenetwork.org/gn3/gene/aliases/" + self.symbol.upper()) + mouse_response = requests.get("http://gn2.genenetwork.org/gn3/gene/aliases/" + self.symbol.capitalize()) + other_response = requests.get("http://gn2.genenetwork.org/gn3/gene/aliases/" + self.symbol.lower()) + + if human_response and mouse_response and other_response: + alias_list = json.loads(human_response.content) + json.loads(mouse_response.content) + json.loads(other_response.content) + + filtered_aliases = [] + seen = set() + for item in alias_list: + if item in seen: + continue + else: + filtered_aliases.append(item) + seen.add(item) + alias = "; ".join(filtered_aliases) return alias @@ -277,7 +303,7 @@ def jsonable_table_row(trait, dataset_name, index): additive = "N/A" else: additive = "%.3f" % round(float(trait.additive), 2) - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.symbol, @@ -293,7 +319,7 @@ def jsonable_table_row(trait, dataset_name, index): else: additive = "%.2f" % round(float(trait.additive), 2) if trait.pubmed_id: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.description_display, @@ -303,7 +329,7 @@ def jsonable_table_row(trait, dataset_name, index): trait.LRS_location_repr, additive] else: - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.description_display, @@ -313,7 +339,7 @@ def jsonable_table_row(trait, dataset_name, index): trait.LRS_location_repr, additive] elif dataset.type == "Geno": - return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', + return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">', index, '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>', trait.location_repr] @@ -326,21 +352,22 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if dataset.type == 'Publish': query = """ SELECT - PublishXRef.Id, Publication.PubMed_ID, + PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, Phenotype.Pre_publication_description, Phenotype.Post_publication_description, Phenotype.Original_description, - Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, + Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, PublishXRef.mean, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, Publication.Authors, Publication.Title, Publication.Abstract, Publication.Journal, Publication.Volume, Publication.Pages, Publication.Month, Publication.Year, PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments FROM - PublishXRef, Publication, Phenotype, PublishFreeze + PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet WHERE PublishXRef.Id = %s AND Phenotype.Id = PublishXRef.PhenotypeId AND Publication.Id = PublishXRef.PublicationId AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishXRef.InbredSetId = InbredSet.Id AND PublishFreeze.Id = %s """ % (trait.name, dataset.id) @@ -390,17 +417,25 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait_info = g.db.execute(query, (string.join(dataset.display_fields,','), dataset.type, trait.name)).fetchone() + if trait_info: trait.haveinfo = True #XZ: assign SQL query result to trait attributes. for i, field in enumerate(dataset.display_fields): holder = trait_info[i] + # if isinstance(trait_info[i], basestring): + # logger.debug("HOLDER:", holder) + # logger.debug("HOLDER2:", holder.decode(encoding='latin1')) + # holder = unicode(trait_info[i], "utf-8", "ignore") if isinstance(trait_info[i], basestring): - holder = unicode(trait_info[i], "utf-8", "ignore") + holder = holder.encode('latin1') setattr(trait, field, holder) if dataset.type == 'Publish': + if trait.group_code: + trait.display_name = trait.group_code + "_" + str(trait.name) + trait.confidential = 0 if trait.pre_publication_description and not trait.pubmed_id: trait.confidential = 1 @@ -427,6 +462,10 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.description_display = "" + trait.abbreviation = unicode(str(trait.abbreviation).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.description_display = unicode(str(trait.description_display).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + trait.authors = unicode(str(trait.authors).strip(codecs.BOM_UTF8), 'utf-8', errors="replace") + if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: @@ -464,7 +503,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): #LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" + trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" if dataset.type == 'ProbeSet' and not trait.cellid: + trait.mean = "" query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive @@ -495,9 +536,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.mean = trait.additive = "" - if dataset.type == 'Publish': query = """ diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index a08acb0a..018d5d54 100644 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,7 +53,11 @@ ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=% EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s" WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s" ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" -DBSNP = 'http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s' +DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s' +PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s" +OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s" +UNIPROT_URL = "https://www.uniprot.org/uniprot/%s" +RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s" # Temporary storage (note that this TMPDIR can be set as an # environment variable - use utility.tools.TEMPDIR when you |