diff options
Diffstat (limited to 'wqflask/base')
-rw-r--r-- | wqflask/base/GeneralObject.py | 66 | ||||
-rw-r--r-- | wqflask/base/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/base/data_set/__init__.py | 124 | ||||
-rw-r--r-- | wqflask/base/data_set/dataset.py | 305 | ||||
-rw-r--r-- | wqflask/base/data_set/datasetgroup.py | 195 | ||||
-rw-r--r-- | wqflask/base/data_set/datasettype.py | 117 | ||||
-rw-r--r-- | wqflask/base/data_set/genotypedataset.py | 76 | ||||
-rw-r--r-- | wqflask/base/data_set/markers.py | 96 | ||||
-rw-r--r-- | wqflask/base/data_set/mrnaassaydataset.py | 179 | ||||
-rw-r--r-- | wqflask/base/data_set/phenotypedataset.py | 134 | ||||
-rw-r--r-- | wqflask/base/data_set/probably_unused.py | 35 | ||||
-rw-r--r-- | wqflask/base/data_set/tempdataset.py | 23 | ||||
-rw-r--r-- | wqflask/base/data_set/utils.py | 80 | ||||
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 102 | ||||
-rw-r--r-- | wqflask/base/species.py | 59 | ||||
-rw-r--r-- | wqflask/base/trait.py | 613 | ||||
-rw-r--r-- | wqflask/base/webqtlCaseData.py | 81 | ||||
-rw-r--r-- | wqflask/base/webqtlConfig.py | 107 |
18 files changed, 0 insertions, 2392 deletions
diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py deleted file mode 100644 index ce8e60b8..00000000 --- a/wqflask/base/GeneralObject.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by GeneNetwork Core Team 2010/10/20 - -class GeneralObject: - """ - Base class to define an Object. - a = [Spam(1, 4), Spam(9, 3), Spam(4,6)] - a.sort(key = lambda x: x.eggs) - """ - - def __init__(self, *args, **kw): - self.contents = list(args) - for name, value in list(kw.items()): - setattr(self, name, value) - - def __setitem__(self, key, value): - setattr(self, key, value) - - def __getitem__(self, key): - return getattr(self, key) - - def __getattr__(self, key): - return eval("self.__dict__.%s" % key) - - def __len__(self): - return len(self.__dict__) - 1 - - def __str__(self): - s = '' - for key in list(self.__dict__.keys()): - if key != 'contents': - s += '%s = %s\n' % (key, self.__dict__[key]) - return s - - def __repr__(self): - s = '' - for key in list(self.__dict__.keys()): - s += '%s = %s\n' % (key, self.__dict__[key]) - return s - - def __eq__(self, other): - return (len(list(self.__dict__.keys())) - == len(list(other.__dict__.keys()))) diff --git a/wqflask/base/__init__.py b/wqflask/base/__init__.py deleted file mode 100644 index e69de29b..00000000 --- a/wqflask/base/__init__.py +++ /dev/null diff --git a/wqflask/base/data_set/__init__.py b/wqflask/base/data_set/__init__.py deleted file mode 100644 index 69eaab53..00000000 --- a/wqflask/base/data_set/__init__.py +++ /dev/null @@ -1,124 +0,0 @@ -"The data_set package ..." - -# builtins imports -import json -import pickle as pickle - -# 3rd-party imports -from redis import Redis - -# local imports -from .dataset import DataSet -from base import webqtlConfig -from utility.tools import get_setting, USE_REDIS -from .datasettype import DatasetType -from .tempdataset import TempDataSet -from .datasetgroup import DatasetGroup -from .utils import query_table_timestamp -from .genotypedataset import GenotypeDataSet -from .phenotypedataset import PhenotypeDataSet -from .mrnaassaydataset import MrnaAssayDataSet -from wqflask.database import database_connection - -# Used by create_database to instantiate objects -# Each subclass will add to this - -DS_NAME_MAP = { - "Temp": "TempDataSet", - "Geno": "GenotypeDataSet", - "Publish": "PhenotypeDataSet", - "ProbeSet": "MrnaAssayDataSet" -} - -def __dataset_type__(dataset_name): - """Get dataset type.""" - if "Temp" in dataset_name: - return "Temp" - if "Geno" in dataset_name: - return "Geno" - if "Publish" in dataset_name: - return "Publish" - return "ProbeSet" - -def create_dataset(dataset_name, dataset_type=None, - get_samplelist=True, group_name=None, redis_conn=Redis()): - dataset_type = dataset_type or __dataset_type__(dataset_name) - - dataset_ob = DS_NAME_MAP[dataset_type] - dataset_class = globals()[dataset_ob] - if dataset_type == "Temp": - return dataset_class(dataset_name, get_samplelist, group_name) - else: - return dataset_class(dataset_name, get_samplelist) - -def datasets(group_name, this_group=None, redis_conn=Redis()): - key = "group_dataset_menu:v2:" + group_name - dataset_menu = [] - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute(''' - (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name - FROM PublishFreeze,InbredSet - WHERE PublishFreeze.InbredSetId = InbredSet.Id - and InbredSet.Name = '%s' - ORDER BY PublishFreeze.Id ASC) - UNION - (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name - FROM GenoFreeze, InbredSet - WHERE GenoFreeze.InbredSetId = InbredSet.Id - and InbredSet.Name = '%s') - UNION - (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name - FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue - WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id - and ProbeFreeze.TissueId = Tissue.Id - and ProbeFreeze.InbredSetId = InbredSet.Id - and InbredSet.Name like %s - ORDER BY Tissue.Name, ProbeSetFreeze.OrderList DESC) - ''' % (group_name, - group_name, - "'" + group_name + "'")) - the_results = cursor.fetchall() - - sorted_results = sorted(the_results, key=lambda kv: kv[0]) - - # ZS: This is kind of awkward, but need to ensure Phenotypes show up before Genotypes in dropdown - pheno_inserted = False - geno_inserted = False - for dataset_item in sorted_results: - tissue_name = dataset_item[0] - dataset = dataset_item[1] - dataset_short = dataset_item[2] - if tissue_name in ['#PublishFreeze', '#GenoFreeze']: - if tissue_name == '#PublishFreeze' and (dataset_short == group_name + 'Publish'): - dataset_menu.insert( - 0, dict(tissue=None, datasets=[(dataset, dataset_short)])) - pheno_inserted = True - elif pheno_inserted and tissue_name == '#GenoFreeze': - dataset_menu.insert( - 1, dict(tissue=None, datasets=[(dataset, dataset_short)])) - geno_inserted = True - else: - dataset_menu.append( - dict(tissue=None, datasets=[(dataset, dataset_short)])) - else: - tissue_already_exists = False - for i, tissue_dict in enumerate(dataset_menu): - if tissue_dict['tissue'] == tissue_name: - tissue_already_exists = True - break - - if tissue_already_exists: - dataset_menu[i]['datasets'].append((dataset, dataset_short)) - else: - dataset_menu.append(dict(tissue=tissue_name, - datasets=[(dataset, dataset_short)])) - - if USE_REDIS: - redis_conn.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL)) - redis_conn.expire(key, 60 * 5) - - if this_group != None: - this_group._datasets = dataset_menu - return this_group._datasets - else: - return dataset_menu diff --git a/wqflask/base/data_set/dataset.py b/wqflask/base/data_set/dataset.py deleted file mode 100644 index 435d74a9..00000000 --- a/wqflask/base/data_set/dataset.py +++ /dev/null @@ -1,305 +0,0 @@ -"Base Dataset class ..." - -import math -import collections - -from redis import Redis - -from base import species -from utility import chunks -from utility.tools import get_setting -from gn3.monads import MonadicDict, query_sql -from pymonad.maybe import Maybe, Nothing -from .datasetgroup import DatasetGroup -from wqflask.database import database_connection -from utility.db_tools import escape, mescape, create_in_clause -from .utils import fetch_cached_results, cache_dataset_results - - -class DataSet: - """ - DataSet class defines a dataset in webqtl, can be either Microarray, - Published phenotype, genotype, or user input dataset(temp) - - """ - - def __init__(self, name, get_samplelist=True, group_name=None, redis_conn=Redis()): - - assert name, "Need a name" - self.name = name - self.id = None - self.shortname = None - self.fullname = None - self.type = None - self.data_scale = None # ZS: For example log2 - self.accession_id = Nothing - - self.setup() - - if self.type == "Temp": # Need to supply group name as input if temp trait - # sets self.group and self.group_id and gets genotype - self.group = DatasetGroup(self, name=group_name) - else: - self.check_confidentiality() - self.retrieve_other_names() - # sets self.group and self.group_id and gets genotype - self.group = DatasetGroup(self) - self.accession_id = self.get_accession_id() - if get_samplelist == True: - self.group.get_samplelist(redis_conn) - self.species = species.TheSpecies(dataset=self) - - def as_monadic_dict(self): - _result = MonadicDict({ - 'name': self.name, - 'shortname': self.shortname, - 'fullname': self.fullname, - 'type': self.type, - 'data_scale': self.data_scale, - 'group': self.group.name - }) - _result["accession_id"] = self.accession_id - return _result - - def get_accession_id(self) -> Maybe[str]: - """Get the accession_id of this dataset depending on the - dataset type.""" - __query = "" - with database_connection(get_setting("SQL_URI")) as conn: - if self.type == "Publish": - __query = ( - "SELECT InfoFiles.GN_AccesionId AS accession_id FROM " - "InfoFiles, PublishFreeze, InbredSet " - "WHERE InbredSet.Name = " - f"'{conn.escape_string(self.group.name).decode()}' " - "AND PublishFreeze.InbredSetId = InbredSet.Id " - "AND InfoFiles.InfoPageName = PublishFreeze.Name " - "AND PublishFreeze.public > 0 AND " - "PublishFreeze.confidentiality < 1 " - "ORDER BY PublishFreeze.CreateTime DESC" - ) - elif self.type == "Geno": - __query = ( - "SELECT InfoFiles.GN_AccesionId AS accession_id FROM " - "InfoFiles, GenoFreeze, InbredSet WHERE InbredSet.Name = " - f"'{conn.escape_string(self.group.name).decode()}' AND " - "GenoFreeze.InbredSetId = InbredSet.Id " - "AND InfoFiles.InfoPageName = GenoFreeze.ShortName " - "AND GenoFreeze.public > 0 AND " - "GenoFreeze.confidentiality < 1 " - "ORDER BY GenoFreeze.CreateTime DESC" - ) - elif self.type == "ProbeSet": - __query = ( - "SELECT InfoFiles.GN_AccesionId AS accession_id " - "FROM InfoFiles WHERE InfoFiles.InfoPageName = " - f"'{conn.escape_string(self.name).decode()}'" - ) - else: # The Value passed is not present - raise LookupError - - # Should there be an empty row, query_sql returns a None - # value instead of yielding a value; this block - # accomodates this non-intuitive edge-case - for result in query_sql(conn, __query) or (): - return result["accession_id"] - return Nothing - - def retrieve_other_names(self): - """This method fetches the the dataset names in search_result. - - If the data set name parameter is not found in the 'Name' field of - the data set table, check if it is actually the FullName or - ShortName instead. - - This is not meant to retrieve the data set info if no name at - all is passed. - - """ - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - try: - if self.type == "ProbeSet": - cursor.execute( - "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, " - "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, " - "ProbeSetFreeze.DataScale, Tissue.Name " - "FROM ProbeSetFreeze, ProbeFreeze, Tissue " - "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND ProbeFreeze.TissueId = Tissue.Id " - "AND (ProbeSetFreeze.Name = %s OR " - "ProbeSetFreeze.FullName = %s " - "OR ProbeSetFreeze.ShortName = %s)", - (self.name,)*3) - (self.id, self.name, self.fullname, self.shortname, - self.data_scale, self.tissue) = cursor.fetchone() - else: - self.tissue = "N/A" - cursor.execute( - "SELECT Id, Name, FullName, ShortName " - f"FROM {self.type}Freeze " - "WHERE (Name = %s OR FullName = " - "%s OR ShortName = %s)", - (self.name,)*3) - (self.id, self.name, self.fullname, - self.shortname) = cursor.fetchone() - except TypeError: - pass - - def chunk_dataset(self, dataset, n): - - results = {} - traits_name_dict = () - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT ProbeSetXRef.DataId,ProbeSet.Name " - "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze " - "WHERE ProbeSetFreeze.Name = %s AND " - "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id", - (self.name,)) - # should cache this - traits_name_dict = dict(cursor.fetchall()) - - for i in range(0, len(dataset), n): - matrix = list(dataset[i:i + n]) - trait_name = traits_name_dict[matrix[0][0]] - - my_values = [value for (trait_name, strain, value) in matrix] - results[trait_name] = my_values - return results - - def get_probeset_data(self, sample_list=None, trait_ids=None): - - # improvement of get trait data--->>> - if sample_list: - self.samplelist = sample_list - - else: - self.samplelist = self.group.samplelist - - if self.group.parlist != None and self.group.f1list != None: - if (self.group.parlist + self.group.f1list) in self.samplelist: - self.samplelist += self.group.parlist + self.group.f1list - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Strain.Name, Strain.Id FROM " - "Strain, Species WHERE Strain.Name IN " - f"{create_in_clause(self.samplelist)} " - "AND Strain.SpeciesId=Species.Id AND " - "Species.name = %s", (self.group.species,) - ) - results = dict(cursor.fetchall()) - sample_ids = [results[item] for item in self.samplelist] - - sorted_samplelist = [strain_name for strain_name, strain_id in sorted( - results.items(), key=lambda item: item[1])] - - cursor.execute( - "SELECT * from ProbeSetData WHERE StrainID IN " - f"{create_in_clause(sample_ids)} AND id IN " - "(SELECT ProbeSetXRef.DataId FROM " - "(ProbeSet, ProbeSetXRef, ProbeSetFreeze) " - "WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetFreeze.Name = %s AND " - "ProbeSet.Id = ProbeSetXRef.ProbeSetId)", - (self.name,) - ) - - query_results = list(cursor.fetchall()) - data_results = self.chunk_dataset(query_results, len(sample_ids)) - self.samplelist = sorted_samplelist - self.trait_data = data_results - - def get_trait_data(self, sample_list=None): - if sample_list: - self.samplelist = sample_list - else: - self.samplelist = self.group.samplelist - - if self.group.parlist != None and self.group.f1list != None: - if (self.group.parlist + self.group.f1list) in self.samplelist: - self.samplelist += self.group.parlist + self.group.f1list - - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Strain.Name, Strain.Id FROM Strain, Species " - f"WHERE Strain.Name IN {create_in_clause(self.samplelist)} " - "AND Strain.SpeciesId=Species.Id " - "AND Species.name = %s", - (self.group.species,) - ) - results = dict(cursor.fetchall()) - sample_ids = [ - sample_id for sample_id in - (results.get(item) for item in self.samplelist - if item is not None) - if sample_id is not None - ] - - # MySQL limits the number of tables that can be used in a join to 61, - # so we break the sample ids into smaller chunks - # Postgres doesn't have that limit, so we can get rid of this after we transition - chunk_size = 50 - number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) - - cached_results = fetch_cached_results(self.name, self.type, self.samplelist) - - if cached_results is None: - trait_sample_data = [] - for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): - if self.type == "Publish": - dataset_type = "Phenotype" - else: - dataset_type = self.type - temp = ['T%s.value' % item for item in sample_ids_step] - if self.type == "Publish": - query = "SELECT {}XRef.Id".format(escape(self.type)) - else: - query = "SELECT {}.Name".format(escape(dataset_type)) - data_start_pos = 1 - if len(temp) > 0: - query = query + ", " + ', '.join(temp) - query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, - self.type, - self.type)) - - for item in sample_ids_step: - query += """ - left join {}Data as T{} on T{}.Id = {}XRef.DataId - and T{}.StrainId={}\n - """.format(*mescape(self.type, item, item, self.type, item, item)) - - if self.type == "Publish": - query += """ - WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(self.type, self.type, self.type, self.name, - dataset_type, self.type, dataset_type, dataset_type)) - else: - query += """ - WHERE {}XRef.{}FreezeId = {}Freeze.Id - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(self.type, self.type, self.type, self.type, - self.name, dataset_type, self.type, self.type, dataset_type)) - cursor.execute(query) - results = cursor.fetchall() - trait_sample_data.append([list(result) for result in results]) - - trait_count = len(trait_sample_data[0]) - self.trait_data = collections.defaultdict(list) - - data_start_pos = 1 - for trait_counter in range(trait_count): - trait_name = trait_sample_data[0][trait_counter][0] - for chunk_counter in range(int(number_chunks)): - self.trait_data[trait_name] += ( - trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) - - cache_dataset_results( - self.name, self.type, self.samplelist, self.trait_data) - else: - self.trait_data = cached_results diff --git a/wqflask/base/data_set/datasetgroup.py b/wqflask/base/data_set/datasetgroup.py deleted file mode 100644 index 95dc976f..00000000 --- a/wqflask/base/data_set/datasetgroup.py +++ /dev/null @@ -1,195 +0,0 @@ -"Dataset Group class ..." - -import os -import json - - -from base import webqtlConfig -from .markers import Markers, HumanMarkers -from utility import webqtlUtil -from utility import gen_geno_ob -from db import webqtlDatabaseFunction -from maintenance import get_group_samplelists -from wqflask.database import database_connection -from utility.tools import ( - locate, - USE_REDIS, - flat_files, - get_setting, - flat_file_exists, - locate_ignore_error) - -class DatasetGroup: - """ - Each group has multiple datasets; each species has multiple groups. - - For example, Mouse has multiple groups (BXD, BXA, etc), and each group - has multiple datasets associated with it. - - """ - - def __init__(self, dataset, name=None): - """This sets self.group and self.group_id""" - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - if not name: - cursor.execute(dataset.query_for_group, - (dataset.name,)) - else: - cursor.execute( - "SELECT InbredSet.Name, " - "InbredSet.Id, " - "InbredSet.GeneticType, " - "InbredSet.InbredSetCode " - "FROM InbredSet WHERE Name = %s", - (name,)) - results = cursor.fetchone() - if results: - (self.name, self.id, self.genetic_type, self.code) = results - else: - self.name = name or dataset.name - if self.name == 'BXD300': - self.name = "BXD" - - self.f1list = None - self.parlist = None - self.get_f1_parent_strains() - - self.mapping_id, self.mapping_names = self.get_mapping_methods() - - self.species = webqtlDatabaseFunction.retrieve_species(self.name) - - self.incparentsf1 = False - self.allsamples = None - self._datasets = None - self.genofile = None - - def get_mapping_methods(self): - mapping_id = () - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT MappingMethodId FROM " - "InbredSet WHERE Name= %s", - (self.name,)) - results = cursor.fetchone() - if results and results[0]: - mapping_id = results[0] - if mapping_id == "1": - mapping_names = ["GEMMA", "QTLReaper", "R/qtl"] - elif mapping_id == "2": - mapping_names = ["GEMMA"] - elif mapping_id == "3": - mapping_names = ["R/qtl"] - elif mapping_id == "4": - mapping_names = ["GEMMA", "PLINK"] - else: - mapping_names = [] - - return mapping_id, mapping_names - - def get_markers(self): - def check_plink_gemma(): - if flat_file_exists("mapping"): - MAPPING_PATH = flat_files("mapping") + "/" - if os.path.isfile(MAPPING_PATH + self.name + ".bed"): - return True - return False - - if check_plink_gemma(): - marker_class = HumanMarkers - else: - marker_class = Markers - - if self.genofile: - self.markers = marker_class(self.genofile[:-5]) - else: - self.markers = marker_class(self.name) - - def get_f1_parent_strains(self): - try: - # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py; - f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name] - except KeyError: - f1 = f12 = maternal = paternal = None - - if f1 and f12: - self.f1list = [f1, f12] - if maternal and paternal: - self.parlist = [maternal, paternal] - - def get_study_samplelists(self): - study_sample_file = locate_ignore_error( - self.name + ".json", 'study_sample_lists') - try: - f = open(study_sample_file) - except: - return [] - study_samples = json.load(f) - return study_samples - - def get_genofiles(self): - jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name) - try: - f = open(jsonfile) - except: - return None - jsondata = json.load(f) - return jsondata['genofile'] - - def get_samplelist(self, redis_conn): - result = None - key = "samplelist:v3:" + self.name - if USE_REDIS: - result = redis_conn.get(key) - - if result is not None: - self.samplelist = json.loads(result) - else: - genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype') - if genotype_fn: - self.samplelist = get_group_samplelists.get_samplelist( - "geno", genotype_fn) - else: - self.samplelist = None - - if USE_REDIS: - redis_conn.set(key, json.dumps(self.samplelist)) - redis_conn.expire(key, 60 * 5) - - def all_samples_ordered(self): - result = [] - lists = (self.parlist, self.f1list, self.samplelist) - [result.extend(l) for l in lists if l] - return result - - def read_genotype_file(self, use_reaper=False): - '''Read genotype from .geno file instead of database''' - # genotype_1 is Dataset Object without parents and f1 - # genotype_2 is Dataset Object with parents and f1 (not for intercross) - - # reaper barfs on unicode filenames, so here we ensure it's a string - if self.genofile: - if "RData" in self.genofile: # ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData - full_filename = str( - locate(self.genofile.split(".")[0] + ".geno", 'genotype')) - else: - full_filename = str(locate(self.genofile, 'genotype')) - else: - full_filename = str(locate(self.name + '.geno', 'genotype')) - genotype_1 = gen_geno_ob.genotype(full_filename) - - if genotype_1.type == "group" and self.parlist: - genotype_2 = genotype_1.add( - Mat=self.parlist[0], Pat=self.parlist[1]) # , F1=_f1) - else: - genotype_2 = genotype_1 - - # determine default genotype object - if self.incparentsf1 and genotype_1.type != "intercross": - genotype = genotype_2 - else: - self.incparentsf1 = 0 - genotype = genotype_1 - - self.samplelist = list(genotype.prgy) - - return genotype diff --git a/wqflask/base/data_set/datasettype.py b/wqflask/base/data_set/datasettype.py deleted file mode 100644 index 05f0f564..00000000 --- a/wqflask/base/data_set/datasettype.py +++ /dev/null @@ -1,117 +0,0 @@ -"DatasetType class ..." - -import json -import requests -from typing import Optional, Dict - - -from redis import Redis - - -from utility.tools import GN2_BASE_URL -from wqflask.database import database_connection - - -class DatasetType: - """Create a dictionary of samples where the value is set to Geno, - Publish or ProbeSet. E.g. - - {'AD-cases-controls-MyersGeno': 'Geno', - 'AD-cases-controls-MyersPublish': 'Publish', - 'AKXDGeno': 'Geno', - 'AXBXAGeno': 'Geno', - 'AXBXAPublish': 'Publish', - 'Aging-Brain-UCIPublish': 'Publish', - 'All Phenotypes': 'Publish', - 'B139_K_1206_M': 'ProbeSet', - 'B139_K_1206_R': 'ProbeSet' ... - } - """ - - def __init__(self, redis_conn): - "Initialise the object" - self.datasets = {} - self.data = {} - # self.redis_instance = redis_instance - data = redis_conn.get("dataset_structure") - if data: - self.datasets = json.loads(data) - else: - # ZS: I don't think this should ever run unless Redis is - # emptied - try: - data = json.loads(requests.get( - GN2_BASE_URL + "/api/v_pre1/gen_dropdown", - timeout=5).content) - for _species in data['datasets']: - for group in data['datasets'][_species]: - for dataset_type in data['datasets'][_species][group]: - for dataset in data['datasets'][_species][group][dataset_type]: - short_dataset_name = dataset[1] - if dataset_type == "Phenotypes": - new_type = "Publish" - elif dataset_type == "Genotypes": - new_type = "Geno" - else: - new_type = "ProbeSet" - self.datasets[short_dataset_name] = new_type - except Exception: # Do nothing - pass - - redis_conn.set("dataset_structure", json.dumps(self.datasets)) - self.data = data - - def set_dataset_key(self, t, name, redis_conn, db_cursor): - """If name is not in the object's dataset dictionary, set it, and - update dataset_structure in Redis - args: - t: Type of dataset structure which can be: 'mrna_expr', 'pheno', - 'other_pheno', 'geno' - name: The name of the key to inserted in the datasets dictionary - - """ - sql_query_mapping = { - 'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM " - "ProbeSetFreeze WHERE " - "ProbeSetFreeze.Name = %s "), - 'pheno': ("SELECT InfoFiles.GN_AccesionId " - "FROM InfoFiles, PublishFreeze, InbredSet " - "WHERE InbredSet.Name = %s AND " - "PublishFreeze.InbredSetId = InbredSet.Id AND " - "InfoFiles.InfoPageName = PublishFreeze.Name"), - 'other_pheno': ("SELECT PublishFreeze.Name " - "FROM PublishFreeze, InbredSet " - "WHERE InbredSet.Name = %s AND " - "PublishFreeze.InbredSetId = InbredSet.Id"), - 'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE " - "GenoFreeze.Name = %s ") - } - - dataset_name_mapping = { - "mrna_expr": "ProbeSet", - "pheno": "Publish", - "other_pheno": "Publish", - "geno": "Geno", - } - - group_name = name - if t in ['pheno', 'other_pheno']: - group_name = name.replace("Publish", "") - - db_cursor.execute(sql_query_mapping[t], (group_name,)) - if db_cursor.fetchone(): - self.datasets[name] = dataset_name_mapping[t] - redis_conn.set( - "dataset_structure", json.dumps(self.datasets)) - return True - - - def __call__(self, name, redis_conn, db_cursor): - if name not in self.datasets: - for t in ["mrna_expr", "pheno", "other_pheno", "geno"]: - # This has side-effects, with the end result being a - # truth-y value - if(self.set_dataset_key(t, name, redis_conn, db_cursor)): - break - # Return None if name has not been set - return self.datasets.get(name, None) diff --git a/wqflask/base/data_set/genotypedataset.py b/wqflask/base/data_set/genotypedataset.py deleted file mode 100644 index b903cd72..00000000 --- a/wqflask/base/data_set/genotypedataset.py +++ /dev/null @@ -1,76 +0,0 @@ -"GenotypeDataSet class ..." - -from .dataset import DataSet -from utility import webqtlUtil -from utility.tools import get_setting -from db import webqtlDatabaseFunction -from .utils import geno_mrna_confidentiality -from wqflask.database import database_connection - -class GenotypeDataSet(DataSet): - - def setup(self): - # Fields in the database table - self.search_fields = ['Name', - 'Chr'] - - # Find out what display_fields is - self.display_fields = ['name', - 'chr', - 'mb', - 'source2', - 'sequence'] - - # Fields displayed in the search results table header - self.header_fields = ['Index', - 'ID', - 'Location'] - - # Todo: Obsolete or rename this field - self.type = 'Geno' - self.query_for_group = """ -SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetCode -FROM InbredSet, GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id AND -GenoFreeze.Name = %s""" - - def check_confidentiality(self): - return geno_mrna_confidentiality(self) - - def get_trait_info(self, trait_list, species=None): - for this_trait in trait_list: - if not this_trait.haveinfo: - this_trait.retrieveInfo() - - if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( - this_trait.chr, float(this_trait.mb)) - - def retrieve_sample_data(self, trait): - results = [] - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Strain.Name, GenoData.value, " - "GenoSE.error, 'N/A', Strain.Name2 " - "FROM (GenoData, GenoFreeze, Strain, Geno, " - "GenoXRef) LEFT JOIN GenoSE ON " - "(GenoSE.DataId = GenoData.Id AND " - "GenoSE.StrainId = GenoData.StrainId) " - "WHERE Geno.SpeciesId = %s AND " - "Geno.Name = %s AND GenoXRef.GenoId = Geno.Id " - "AND GenoXRef.GenoFreezeId = GenoFreeze.Id " - "AND GenoFreeze.Name = %s AND " - "GenoXRef.DataId = GenoData.Id " - "AND GenoData.StrainId = Strain.Id " - "ORDER BY Strain.Name", - (webqtlDatabaseFunction.retrieve_species_id(self.group.name), - trait, self.name,)) - results = list(cursor.fetchall()) - - if self.group.name in webqtlUtil.ParInfo: - f1_1, f1_2, ref, nonref = webqtlUtil.ParInfo[self.group.name] - results.append([f1_1, 0, None, "N/A", f1_1]) - results.append([f1_2, 0, None, "N/A", f1_2]) - results.append([ref, -1, None, "N/A", ref]) - results.append([nonref, 1, None, "N/A", nonref]) - - return results diff --git a/wqflask/base/data_set/markers.py b/wqflask/base/data_set/markers.py deleted file mode 100644 index 6f56445e..00000000 --- a/wqflask/base/data_set/markers.py +++ /dev/null @@ -1,96 +0,0 @@ -"Base Class: Markers - " - -import math - -from utility.tools import locate, flat_files - -class Markers: - """Todo: Build in cacheing so it saves us reading the same file more than once""" - - def __init__(self, name): - json_data_fh = open(locate(name + ".json", 'genotype/json')) - - markers = [] - with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh: - if len(bimbam_fh.readline().split(", ")) > 2: - delimiter = ", " - elif len(bimbam_fh.readline().split(",")) > 2: - delimiter = "," - elif len(bimbam_fh.readline().split("\t")) > 2: - delimiter = "\t" - else: - delimiter = " " - for line in bimbam_fh: - marker = {} - marker['name'] = line.split(delimiter)[0].rstrip() - marker['Mb'] = float(line.split(delimiter)[ - 1].rstrip()) / 1000000 - marker['chr'] = line.split(delimiter)[2].rstrip() - markers.append(marker) - - for marker in markers: - if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"): - marker['chr'] = int(marker['chr']) - marker['Mb'] = float(marker['Mb']) - - self.markers = markers - - def add_pvalues(self, p_values): - if isinstance(p_values, list): - # THIS IS only needed for the case when we are limiting the number of p-values calculated - # if len(self.markers) > len(p_values): - # self.markers = self.markers[:len(p_values)] - - for marker, p_value in zip(self.markers, p_values): - if not p_value: - continue - marker['p_value'] = float(p_value) - if math.isnan(marker['p_value']) or marker['p_value'] <= 0: - marker['lod_score'] = 0 - marker['lrs_value'] = 0 - else: - marker['lod_score'] = -math.log10(marker['p_value']) - # Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - elif isinstance(p_values, dict): - filtered_markers = [] - for marker in self.markers: - if marker['name'] in p_values: - marker['p_value'] = p_values[marker['name']] - if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): - marker['lod_score'] = 0 - marker['lrs_value'] = 0 - else: - marker['lod_score'] = -math.log10(marker['p_value']) - # Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = - \ - math.log10(marker['p_value']) * 4.61 - filtered_markers.append(marker) - self.markers = filtered_markers - - -class HumanMarkers(Markers): - "Markers for humans ..." - - def __init__(self, name, specified_markers=[]): - marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim') - self.markers = [] - for line in marker_data_fh: - splat = line.strip().split() - if len(specified_markers) > 0: - if splat[1] in specified_markers: - marker = {} - marker['chr'] = int(splat[0]) - marker['name'] = splat[1] - marker['Mb'] = float(splat[3]) / 1000000 - else: - continue - else: - marker = {} - marker['chr'] = int(splat[0]) - marker['name'] = splat[1] - marker['Mb'] = float(splat[3]) / 1000000 - self.markers.append(marker) - - def add_pvalues(self, p_values): - super(HumanMarkers, self).add_pvalues(p_values) diff --git a/wqflask/base/data_set/mrnaassaydataset.py b/wqflask/base/data_set/mrnaassaydataset.py deleted file mode 100644 index 4eb998b5..00000000 --- a/wqflask/base/data_set/mrnaassaydataset.py +++ /dev/null @@ -1,179 +0,0 @@ -"MrnaAssayDataSet class ..." - -import codecs - - -from .dataset import DataSet -from .utils import geno_mrna_confidentiality -from wqflask.database import database_connection -from utility.tools import get_setting - -class MrnaAssayDataSet(DataSet): - ''' - An mRNA Assay is a quantitative assessment (assay) associated with an mRNA trait - - This used to be called ProbeSet, but that term only refers specifically to the Affymetrix - platform and is far too specific. - - ''' - - def setup(self): - # Fields in the database table - self.search_fields = ['Name', - 'Description', - 'Probe_Target_Description', - 'Symbol', - 'Alias', - 'GenbankId', - 'UniGeneId', - 'RefSeq_TranscriptId'] - - # Find out what display_fields is - self.display_fields = ['name', 'symbol', - 'description', 'probe_target_description', - 'chr', 'mb', - 'alias', 'geneid', - 'genbankid', 'unigeneid', - 'omim', 'refseq_transcriptid', - 'blatseq', 'targetseq', - 'chipid', 'comments', - 'strand_probe', 'strand_gene', - 'proteinid', 'uniprotid', - 'probe_set_target_region', - 'probe_set_specificity', - 'probe_set_blat_score', - 'probe_set_blat_mb_start', - 'probe_set_blat_mb_end', - 'probe_set_strand', - 'probe_set_note_by_rw', - 'flag'] - - # Fields displayed in the search results table header - self.header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] - - # Todo: Obsolete or rename this field - self.type = 'ProbeSet' - self.query_for_group = """ -SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetCode -FROM InbredSet, ProbeSetFreeze, ProbeFreeze WHERE ProbeFreeze.InbredSetId = InbredSet.Id AND -ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND ProbeSetFreeze.Name = %s""" - - def check_confidentiality(self): - return geno_mrna_confidentiality(self) - - def get_trait_info(self, trait_list=None, species=''): - - # Note: setting trait_list to [] is probably not a great idea. - if not trait_list: - trait_list = [] - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - for this_trait in trait_list: - - if not this_trait.haveinfo: - this_trait.retrieveInfo(QTL=1) - - if not this_trait.symbol: - this_trait.symbol = "N/A" - - # XZ, 12/08/2008: description - # XZ, 06/05/2009: Rob asked to add probe target description - description_string = str( - str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = str( - str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') - - if len(description_string) > 1 and description_string != 'None': - description_display = description_string - else: - description_display = this_trait.symbol - - if (len(description_display) > 1 and description_display != 'N/A' - and len(target_string) > 1 and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() - - # Save it for the jinja2 template - this_trait.description_display = description_display - - if this_trait.chr and this_trait.mb: - this_trait.location_repr = 'Chr%s: %.6f' % ( - this_trait.chr, float(this_trait.mb)) - - # Get mean expression value - cursor.execute( - "SELECT ProbeSetXRef.mean FROM " - "ProbeSetXRef, ProbeSet WHERE " - "ProbeSetXRef.ProbeSetFreezeId = %s " - "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId " - "AND ProbeSet.Name = %s", - (str(this_trait.dataset.id), this_trait.name,) - ) - result = cursor.fetchone() - - mean = result[0] if result else 0 - - if mean: - this_trait.mean = "%2.3f" % mean - - # LRS and its location - this_trait.LRS_score_repr = 'N/A' - this_trait.LRS_location_repr = 'N/A' - - # Max LRS and its Locus location - if this_trait.lrs and this_trait.locus: - cursor.execute( - "SELECT Geno.Chr, Geno.Mb FROM " - "Geno, Species WHERE " - "Species.Name = %s AND " - "Geno.Name = %s AND " - "Geno.SpeciesId = Species.Id", - (species, this_trait.locus,) - ) - if result := cursor.fetchone(): - lrs_chr, lrs_mb = result - this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = 'Chr%s: %.6f' % ( - lrs_chr, float(lrs_mb)) - - return trait_list - - def retrieve_sample_data(self, trait): - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Strain.Name, ProbeSetData.value, " - "ProbeSetSE.error, NStrain.count, " - "Strain.Name2 FROM (ProbeSetData, " - "ProbeSetFreeze, Strain, ProbeSet, " - "ProbeSetXRef) LEFT JOIN ProbeSetSE ON " - "(ProbeSetSE.DataId = ProbeSetData.Id AND " - "ProbeSetSE.StrainId = ProbeSetData.StrainId) " - "LEFT JOIN NStrain ON " - "(NStrain.DataId = ProbeSetData.Id AND " - "NStrain.StrainId = ProbeSetData.StrainId) " - "WHERE ProbeSet.Name = %s AND " - "ProbeSetXRef.ProbeSetId = ProbeSet.Id " - "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetFreeze.Name = %s AND " - "ProbeSetXRef.DataId = ProbeSetData.Id " - "AND ProbeSetData.StrainId = Strain.Id " - "ORDER BY Strain.Name", - (trait, self.name,) - ) - return cursor.fetchall() - - def retrieve_genes(self, column_name): - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - f"SELECT ProbeSet.Name, ProbeSet.{column_name} " - "FROM ProbeSet,ProbeSetXRef WHERE " - "ProbeSetXRef.ProbeSetFreezeId = %s " - "AND ProbeSetXRef.ProbeSetId=ProbeSet.Id", - (str(self.id),)) - return dict(cursor.fetchall()) diff --git a/wqflask/base/data_set/phenotypedataset.py b/wqflask/base/data_set/phenotypedataset.py deleted file mode 100644 index 5a39418a..00000000 --- a/wqflask/base/data_set/phenotypedataset.py +++ /dev/null @@ -1,134 +0,0 @@ -"PhenotypeDataSet class ..." - -from .dataset import DataSet -from base import webqtlConfig -from utility.tools import get_setting -from wqflask.database import database_connection - -class PhenotypeDataSet(DataSet): - - def setup(self): - # Fields in the database table - self.search_fields = ['Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'PublishXRef.mean', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors', - 'PublishXRef.Id'] - - # Figure out what display_fields is - self.display_fields = ['name', 'group_code', - 'pubmed_id', - 'pre_publication_description', - 'post_publication_description', - 'original_description', - 'pre_publication_abbreviation', - 'post_publication_abbreviation', - 'mean', - 'lab_code', - 'submitter', 'owner', - 'authorized_users', - 'authors', 'title', - 'abstract', 'journal', - 'volume', 'pages', - 'month', 'year', - 'sequence', 'units', 'comments'] - - # Fields displayed in the search results table header - self.header_fields = ['Index', - 'Record', - 'Description', - 'Authors', - 'Year', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] - - self.type = 'Publish' - self.query_for_group = """ -SELECT InbredSet.Name, InbredSet.Id, InbredSet.GeneticType, InbredSet.InbredSetCode FROM InbredSet, PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id AND PublishFreeze.Name = %s""" - - def check_confidentiality(self): - # (Urgently?) Need to write this - pass - - def get_trait_info(self, trait_list, species=''): - for this_trait in trait_list: - - if not this_trait.haveinfo: - this_trait.retrieve_info(get_qtl_info=True) - - description = this_trait.post_publication_description - - # If the dataset is confidential and the user has access to confidential - # phenotype traits, then display the pre-publication description instead - # of the post-publication description - if this_trait.confidential: - this_trait.description_display = "" - continue # for now, because no authorization features - - if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( - privilege=self.privilege, - userName=self.userName, - authorized_users=this_trait.authorized_users): - - description = this_trait.pre_publication_description - - if len(description) > 0: - this_trait.description_display = description.strip() - else: - this_trait.description_display = "" - - if not this_trait.year.isdigit(): - this_trait.pubmed_text = "N/A" - else: - this_trait.pubmed_text = this_trait.year - - if this_trait.pubmed_id: - this_trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % this_trait.pubmed_id - - # LRS and its location - this_trait.LRS_score_repr = "N/A" - this_trait.LRS_location_repr = "N/A" - - if this_trait.lrs: - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Geno.Chr, Geno.Mb FROM " - "Geno, Species WHERE " - "Species.Name = %s AND " - "Geno.Name = %s AND " - "Geno.SpeciesId = Species.Id", - (species, this_trait.locus,) - ) - if result := cursor.fetchone(): - if result[0] and result[1]: - LRS_Chr, LRS_Mb = result[0], result[1] - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( - LRS_Chr, float(LRS_Mb)) - - def retrieve_sample_data(self, trait): - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT Strain.Name, PublishData.value, " - "PublishSE.error, NStrain.count, " - "Strain.Name2 FROM (PublishData, Strain, " - "PublishXRef, PublishFreeze) LEFT JOIN " - "PublishSE ON " - "(PublishSE.DataId = PublishData.Id " - "AND PublishSE.StrainId = PublishData.StrainId) " - "LEFT JOIN NStrain ON " - "(NStrain.DataId = PublishData.Id AND " - "NStrain.StrainId = PublishData.StrainId) " - "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishData.Id = PublishXRef.DataId AND " - "PublishXRef.Id = %s AND PublishFreeze.Id = %s " - "AND PublishData.StrainId = Strain.Id " - "ORDER BY Strain.Name", (trait, self.id)) - return cursor.fetchall() diff --git a/wqflask/base/data_set/probably_unused.py b/wqflask/base/data_set/probably_unused.py deleted file mode 100644 index abd3ad07..00000000 --- a/wqflask/base/data_set/probably_unused.py +++ /dev/null @@ -1,35 +0,0 @@ -"Functions that are probably unused in the code" - -import pickle as pickle - -from wqflask.database import database_connection -from utility.tools import get_setting - -def create_datasets_list(): - if USE_REDIS: - key = "all_datasets" - result = redis_conn.get(key) - - if result: - datasets = pickle.loads(result) - - if result is None: - datasets = list() - type_dict = {'Publish': 'PublishFreeze', - 'ProbeSet': 'ProbeSetFreeze', - 'Geno': 'GenoFreeze'} - - for dataset_type in type_dict: - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute("SELECT Name FROM %s", - (type_dict[dataset_type],)) - results = cursor.fetchall(query) - if results: - for result in results: - datasets.append( - create_dataset(result.Name, dataset_type)) - if USE_REDIS: - redis_conn.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL)) - redis_conn.expire(key, 60 * 60) - - return datasets diff --git a/wqflask/base/data_set/tempdataset.py b/wqflask/base/data_set/tempdataset.py deleted file mode 100644 index b1c26a3b..00000000 --- a/wqflask/base/data_set/tempdataset.py +++ /dev/null @@ -1,23 +0,0 @@ -"TempDataSet class ..." - -from .dataset import DataSet - -class TempDataSet(DataSet): - """Temporary user-generated data set""" - - def setup(self): - self.search_fields = ['name', - 'description'] - - self.display_fields = ['name', - 'description'] - - self.header_fields = ['Name', - 'Description'] - - self.type = 'Temp' - - # Need to double check later how these are used - self.id = 1 - self.fullname = 'Temporary Storage' - self.shortname = 'Temp' diff --git a/wqflask/base/data_set/utils.py b/wqflask/base/data_set/utils.py deleted file mode 100644 index d18180c4..00000000 --- a/wqflask/base/data_set/utils.py +++ /dev/null @@ -1,80 +0,0 @@ -"data_set package utilities" - -import datetime -import os -import json -import hashlib -from typing import List - - -from utility.tools import get_setting, SQL_URI -from base.webqtlConfig import TMPDIR -from wqflask.database import parse_db_url, database_connection - -def geno_mrna_confidentiality(ob): - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - cursor.execute( - "SELECT confidentiality, " - f"AuthorisedUsers FROM {ob.type}Freeze WHERE Name = %s", - (ob.name,) - ) - result = cursor.fetchall() - if len(result) > 0 and result[0]: - return True - -def query_table_timestamp(dataset_type: str): - """function to query the update timestamp of a given dataset_type""" - - # computation data and actions - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - fetch_db_name = parse_db_url(SQL_URI) - cursor.execute( - "SELECT UPDATE_TIME FROM " - "information_schema.tables " - f"WHERE TABLE_SCHEMA = '{fetch_db_name[3]}' " - f"AND TABLE_NAME = '{dataset_type}Data'") - date_time_obj = cursor.fetchone()[0] - if not date_time_obj: - date_time_obj = datetime.datetime.now() - return date_time_obj.strftime("%Y-%m-%d %H:%M:%S") - - -def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str): - """given the trait_name generate a unique name for this""" - string_unicode = f"{dataset_name}{dataset_timestamp}{samplelist}".encode() - md5hash = hashlib.md5(string_unicode) - return md5hash.hexdigest() - - -def cache_dataset_results(dataset_name: str, dataset_type: str, samplelist: List, query_results: List): - """function to cache dataset query results to file - input dataset_name and type query_results(already processed in default dict format) - """ - # data computations actions - # store the file path on redis - - table_timestamp = query_table_timestamp(dataset_type) - samplelist_as_str = ",".join(samplelist) - - file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str) - file_path = os.path.join(TMPDIR, f"{file_name}.json") - - with open(file_path, "w") as file_handler: - json.dump(query_results, file_handler) - - -def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List): - """function to fetch the cached results""" - - table_timestamp = query_table_timestamp(dataset_type) - samplelist_as_str = ",".join(samplelist) - - file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str) - file_path = os.path.join(TMPDIR, f"{file_name}.json") - try: - with open(file_path, "r") as file_handler: - - return json.load(file_handler) - - except Exception: - pass diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py deleted file mode 100644 index a78182e3..00000000 --- a/wqflask/base/mrna_assay_tissue_data.py +++ /dev/null @@ -1,102 +0,0 @@ -import collections - -from utility import Bunch - - -class MrnaAssayTissueData: - - def __init__(self, conn, gene_symbols=None): - self.gene_symbols = gene_symbols - self.conn = conn - if self.gene_symbols is None: - self.gene_symbols = [] - - self.data = collections.defaultdict(Bunch) - results = () - # Note that inner join is necessary in this query to get - # distinct record in one symbol group with highest mean value - # Due to the limit size of TissueProbeSetFreezeId table in DB, - # performance of inner join is - # acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) - with conn.cursor() as cursor: - if len(self.gene_symbols) == 0: - cursor.execute( - "SELECT t.Symbol, t.GeneId, t.DataId, " - "t.Chr, t.Mb, t.description, " - "t.Probe_Target_Description FROM (SELECT Symbol, " - "max(Mean) AS maxmean " - "FROM TissueProbeSetXRef WHERE " - "TissueProbeSetFreezeId=1 AND " - "Symbol != '' AND Symbol IS NOT " - "Null GROUP BY Symbol) " - "AS x INNER JOIN " - "TissueProbeSetXRef AS t ON " - "t.Symbol = x.Symbol " - "AND t.Mean = x.maxmean") - else: - cursor.execute( - "SELECT t.Symbol, t.GeneId, t.DataId, " - "t.Chr, t.Mb, t.description, " - "t.Probe_Target_Description FROM (SELECT Symbol, " - "max(Mean) AS maxmean " - "FROM TissueProbeSetXRef WHERE " - "TissueProbeSetFreezeId=1 AND " - "Symbol IN " - f"({', '.join(['%s'] * len(self.gene_symbols))}) " - "GROUP BY Symbol) AS x INNER JOIN " - "TissueProbeSetXRef AS t ON t.Symbol = x.Symbol " - "AND t.Mean = x.maxmean", - tuple(self.gene_symbols)) - results = list(cursor.fetchall()) - lower_symbols = {} - for gene_symbol in self.gene_symbols: - if gene_symbol is not None: - lower_symbols[gene_symbol.lower()] = True - - for result in results: - (symbol, gene_id, data_id, _chr, _mb, - descr, probeset_target_descr) = result - if symbol is not None and lower_symbols.get(symbol.lower()): - symbol = symbol.lower() - self.data[symbol].gene_id = gene_id - self.data[symbol].data_id = data_id - self.data[symbol].chr = _chr - self.data[symbol].mb = _mb - self.data[symbol].description = descr - (self.data[symbol] - .probe_target_description) = probeset_target_descr - - - def get_symbol_values_pairs(self): - """Get one dictionary whose key is gene symbol and value is - tissue expression data (list type). All keys are lower case. - - The output is a symbolValuepairDict (dictionary): one - dictionary of Symbol and Value Pair; key is symbol, value is - one list of expression values of one probeSet; - - """ - id_list = [self.data[symbol].data_id for symbol in self.data] - - symbol_values_dict = {} - - if len(id_list) > 0: - results = [] - with self.conn.cursor() as cursor: - - cursor.execute( - "SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value " - "FROM TissueProbeSetXRef, TissueProbeSetData" - f" WHERE TissueProbeSetData.Id IN ({', '.join(['%s'] * len(id_list))})" - " AND TissueProbeSetXRef.DataId = TissueProbeSetData.Id" - ,tuple(id_list)) - - results = cursor.fetchall() - for result in results: - (symbol, value) = result - if symbol.lower() not in symbol_values_dict: - symbol_values_dict[symbol.lower()] = [value] - else: - symbol_values_dict[symbol.lower()].append( - value) - return symbol_values_dict diff --git a/wqflask/base/species.py b/wqflask/base/species.py deleted file mode 100644 index 0844fada..00000000 --- a/wqflask/base/species.py +++ /dev/null @@ -1,59 +0,0 @@ -from dataclasses import dataclass -from typing import Optional, Union -from collections import OrderedDict - - -class TheSpecies: - """Data related to species.""" - - def __init__(self, dataset=None, species_name=None) -> None: - "Initialise the Species object" - self.dataset = dataset - self.name = self.species_name = species_name - self.chromosomes = Chromosomes(species=species_name, - dataset=dataset) - - -@dataclass -class IndChromosome: - """Data related to IndChromosome""" - name: str - length: int - - @property - def mb_length(self) -> Union[int, float]: - """Chromosome length in mega-bases""" - return self.length / 1000000 - - -@dataclass -class Chromosomes: - """Data related to a chromosome""" - - def __init__(self, dataset, species: Optional[str]) -> None: - "initialise the Chromosome object" - self.species = species - if species is None: - self.dataset = dataset - - def chromosomes(self, db_cursor) -> OrderedDict: - """Lazily fetch the chromosomes""" - chromosomes = OrderedDict() - if self.species is not None: - db_cursor.execute( - "SELECT Chr_Length.Name, Chr_Length.OrderId, Length " - "FROM Chr_Length, Species WHERE " - "Chr_Length.SpeciesId = Species.SpeciesId AND " - "Species.Name = %s " - "ORDER BY OrderId", (self.species.capitalize(),)) - else: - db_cursor.execute( - "SELECT Chr_Length.Name, Chr_Length.OrderId, " - "Length FROM Chr_Length, InbredSet WHERE " - "Chr_Length.SpeciesId = InbredSet.SpeciesId AND " - "InbredSet.Name = " - "%s ORDER BY OrderId", (self.dataset.group.name,)) - for name, _, length in db_cursor.fetchall(): - chromosomes[name] = IndChromosome( - name=name, length=length) - return chromosomes diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py deleted file mode 100644 index 103ff0c0..00000000 --- a/wqflask/base/trait.py +++ /dev/null @@ -1,613 +0,0 @@ -import requests -import simplejson as json -from wqflask import app - -import utility.hmac as hmac -from base import webqtlConfig -from base.webqtlCaseData import webqtlCaseData -from base.data_set import create_dataset -from utility.authentication_tools import check_resource_availability -from utility.tools import get_setting, GN2_BASE_URL -from utility.redis_tools import get_redis_conn, get_resource_id - -from flask import g, request, url_for - -from wqflask.database import database_connection - - -Redis = get_redis_conn() - - -def create_trait(**kw): - assert bool(kw.get('dataset')) != bool( - kw.get('dataset_name')), "Needs dataset ob. or name" - - assert bool(kw.get('name')), "Needs trait name" - - - if bool(kw.get('dataset')): - dataset = kw.get('dataset') - - - else: - if kw.get('dataset_name') != "Temp": - - - dataset = create_dataset(kw.get('dataset_name')) - else: - - dataset = create_dataset( - dataset_name="Temp", - dataset_type="Temp", - group_name= kw.get('name').split("_")[2]) - - - if dataset.type == 'Publish': - permissions = check_resource_availability( - dataset, g.user_session.user_id, kw.get('name')) - else: - permissions = check_resource_availability( - dataset, g.user_session.user_id) - - - if permissions['data'] != "no-access": - - the_trait = GeneralTrait(**dict(kw,dataset=dataset)) - if the_trait.dataset.type != "Temp": - the_trait = retrieve_trait_info( - the_trait, - the_trait.dataset, - get_qtl_info=kw.get('get_qtl_info')) - return the_trait - else: - return None - - -class GeneralTrait: - """ - Trait class defines a trait in webqtl, can be either Microarray, - Published phenotype, genotype, or user input trait - - """ - - def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): - # xor assertion - assert kw.get("dataset"), "Dataset obj is needed as a kwarg" - - # Trait ID, ProbeSet ID, Published ID, etc. - self.name = kw.get('name') - self.dataset = kw.get("dataset") - self.cellid = kw.get('cellid') - self.identification = kw.get('identification', 'un-named trait') - self.haveinfo = kw.get('haveinfo', False) - # Blat sequence, available for ProbeSet - self.sequence = kw.get('sequence') - self.data = kw.get('data', {}) - self.view = True - - # Sets defaults - self.locus = None - self.lrs = None - self.pvalue = None - self.mean = None - self.additive = None - self.num_overlap = None - self.strand_probe = None - self.symbol = None - self.abbreviation = None - self.display_name = self.name - - self.LRS_score_repr = "N/A" - self.LRS_location_repr = "N/A" - self.chr = self.mb = self.locus_chr = self.locus_mb = "" - - if kw.get('fullname'): - name2 = value.split("::") - if len(name2) == 2: - self.dataset, self.name = name2 - # self.cellid is set to None above - elif len(name2) == 3: - self.dataset, self.name, self.cellid = name2 - - # Todo: These two lines are necessary most of the time, but - # perhaps not all of the time So we could add a simple if - # statement to short-circuit this if necessary - if get_sample_info is not False: - self = retrieve_sample_data(self, self.dataset) - - def export_informative(self, include_variance=0): - """ - export informative sample - mostly used in qtl regression - - """ - samples = [] - vals = [] - the_vars = [] - sample_aliases = [] - for sample_name, sample_data in list(self.data.items()): - if sample_data.value is not None: - if not include_variance or sample_data.variance is not None: - samples.append(sample_name) - vals.append(sample_data.value) - the_vars.append(sample_data.variance) - sample_aliases.append(sample_data.name2) - return samples, vals, the_vars, sample_aliases - - @property - def description_fmt(self): - """Return a text formated description""" - if self.dataset.type == 'ProbeSet': - if self.description: - formatted = self.description - if self.probe_target_description: - formatted += "; " + self.probe_target_description - else: - formatted = "Not available" - elif self.dataset.type == 'Publish': - if self.confidential: - formatted = self.pre_publication_description - else: - formatted = self.post_publication_description - else: - formatted = "Not available" - if isinstance(formatted, bytes): - formatted = formatted.decode("utf-8") - return formatted - - @property - def alias_fmt(self): - """Return a text formatted alias""" - - alias = 'Not available' - if getattr(self, "alias", None): - alias = self.alias.replace(";", " ") - alias = ", ".join(alias.split()) - - return alias - - @property - def wikidata_alias_fmt(self): - """Return a text formatted alias""" - - alias = 'Not available' - if self.symbol: - human_response = requests.get( - GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.upper()) - mouse_response = requests.get( - GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.capitalize()) - other_response = requests.get( - GN2_BASE_URL + "gn3/gene/aliases/" + self.symbol.lower()) - - if human_response and mouse_response and other_response: - alias_list = json.loads(human_response.content) + json.loads( - mouse_response.content) + \ - json.loads(other_response.content) - - filtered_aliases = [] - seen = set() - for item in alias_list: - if item in seen: - continue - else: - filtered_aliases.append(item) - seen.add(item) - alias = "; ".join(filtered_aliases) - - return alias - - @property - def location_fmt(self): - """Return a text formatted location - - While we're at it we set self.location in case we need it - later (do we?) - - """ - - if self.chr == "Un": - return 'Not available' - - if self.chr and self.mb: - self.location = 'Chr %s @ %s Mb' % (self.chr, self.mb) - elif self.chr: - self.location = 'Chr %s @ Unknown position' % (self.chr) - else: - self.location = 'Not available' - - fmt = self.location - # XZ: deal with direction - if self.strand_probe == '+': - fmt += (' on the plus strand ') - elif self.strand_probe == '-': - fmt += (' on the minus strand ') - - return fmt - - -def retrieve_sample_data(trait, dataset, samplelist=None): - if samplelist is None: - samplelist = [] - - if dataset.type == "Temp": - results = Redis.get(trait.name).split() - else: - results = dataset.retrieve_sample_data(trait.name) - # Todo: is this necessary? If not remove - trait.data.clear() - - if results: - if dataset.type == "Temp": - all_samples_ordered = dataset.group.all_samples_ordered() - for i, item in enumerate(results): - try: - trait.data[all_samples_ordered[i]] = webqtlCaseData( - all_samples_ordered[i], float(item)) - except: - pass - else: - for item in results: - name, value, variance, num_cases, name2 = item - if not samplelist or (samplelist and name in samplelist): - # name, value, variance, num_cases) - trait.data[name] = webqtlCaseData(*item) - return trait - - -@app.route("/trait/get_sample_data") -def get_sample_data(): - params = request.args - trait = params['trait'] - dataset = params['dataset'] - - trait_ob = create_trait(name=trait, dataset_name=dataset) - if trait_ob: - trait_dict = {} - trait_dict['name'] = trait - trait_dict['db'] = dataset - trait_dict['type'] = trait_ob.dataset.type - trait_dict['group'] = trait_ob.dataset.group.name - trait_dict['tissue'] = trait_ob.dataset.tissue - trait_dict['species'] = trait_ob.dataset.group.species - trait_dict['url'] = url_for( - 'show_trait_page', trait_id=trait, dataset=dataset) - if trait_ob.dataset.type == "ProbeSet": - trait_dict['symbol'] = trait_ob.symbol - trait_dict['location'] = trait_ob.location_repr - trait_dict['description'] = trait_ob.description_display - elif trait_ob.dataset.type == "Publish": - trait_dict['description'] = trait_ob.description_display - if trait_ob.pubmed_id: - trait_dict['pubmed_link'] = trait_ob.pubmed_link - trait_dict['pubmed_text'] = trait_ob.pubmed_text - else: - trait_dict['location'] = trait_ob.location_repr - - return json.dumps([trait_dict, {key: value.value for - key, value in list( - trait_ob.data.items())}]) - else: - return None - - -def jsonable(trait, dataset=None): - """Return a dict suitable for using as json - - Actual turning into json doesn't happen here though""" - - if not dataset: - dataset = create_dataset(dataset_name=trait.dataset.name, - dataset_type=trait.dataset.type, - group_name=trait.dataset.group.name) - - - trait_symbol = "N/A" - trait_mean = "N/A" - if trait.symbol: - trait_symbol = trait.symbol - if trait.mean: - trait_mean = trait.mean - - if dataset.type == "ProbeSet": - return dict(name=trait.name, - display_name=trait.display_name, - hmac=hmac.data_hmac('{}:{}'.format(trait.display_name, dataset.name)), - view=str(trait.view), - symbol=trait_symbol, - dataset=dataset.name, - dataset_name=dataset.shortname, - description=trait.description_display, - mean=trait_mean, - location=trait.location_repr, - chr=trait.chr, - mb=trait.mb, - lrs_score=trait.LRS_score_repr, - lrs_location=trait.LRS_location_repr, - lrs_chr=trait.locus_chr, - lrs_mb=trait.locus_mb, - additive=trait.additive - ) - elif dataset.type == "Publish": - if trait.pubmed_id: - return dict(name=trait.name, - display_name=trait.display_name, - hmac=hmac.data_hmac('{}:{}'.format(trait.name, dataset.name)), - view=str(trait.view), - symbol=trait.abbreviation, - dataset=dataset.name, - dataset_name=dataset.shortname, - description=trait.description_display, - abbreviation=trait.abbreviation, - authors=trait.authors, - pubmed_id=trait.pubmed_id, - pubmed_text=trait.pubmed_text, - pubmed_link=trait.pubmed_link, - mean=trait_mean, - lrs_score=trait.LRS_score_repr, - lrs_location=trait.LRS_location_repr, - lrs_chr=trait.locus_chr, - lrs_mb=trait.locus_mb, - additive=trait.additive - ) - else: - return dict(name=trait.name, - display_name=trait.display_name, - hmac=hmac.data_hmac('{}:{}'.format(trait.name, dataset.name)), - view=str(trait.view), - symbol=trait.abbreviation, - dataset=dataset.name, - dataset_name=dataset.shortname, - description=trait.description_display, - abbreviation=trait.abbreviation, - authors=trait.authors, - pubmed_text=trait.pubmed_text, - mean=trait_mean, - lrs_score=trait.LRS_score_repr, - lrs_location=trait.LRS_location_repr, - lrs_chr=trait.locus_chr, - lrs_mb=trait.locus_mb, - additive=trait.additive - ) - elif dataset.type == "Geno": - return dict(name=trait.name, - display_name=trait.display_name, - hmac=hmac.data_hmac('{}:{}'.format(trait.display_name, dataset.name)), - view=str(trait.view), - dataset=dataset.name, - dataset_name=dataset.shortname, - location=trait.location_repr, - chr=trait.chr, - mb=trait.mb - ) - elif dataset.name == "Temp": - return dict(name=trait.name, - display_name=trait.display_name, - hmac=hmac.data_hmac('{}:{}'.format(trait.display_name, dataset.name)), - view=str(trait.view), - dataset="Temp", - dataset_name="Temp") - else: - return dict() - - -def retrieve_trait_info(trait, dataset, get_qtl_info=False): - if not dataset: - raise ValueError("Dataset doesn't exist") - - with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: - trait_info = () - if dataset.type == 'Publish': - cursor.execute( - "SELECT PublishXRef.Id, InbredSet.InbredSetCode, " - "Publication.PubMed_ID, " - "CAST(Phenotype.Pre_publication_description AS BINARY), " - "CAST(Phenotype.Post_publication_description AS BINARY), " - "CAST(Phenotype.Original_description AS BINARY), " - "CAST(Phenotype.Pre_publication_abbreviation AS BINARY), " - "CAST(Phenotype.Post_publication_abbreviation AS BINARY), " - "PublishXRef.mean, Phenotype.Lab_code, " - "Phenotype.Submitter, Phenotype.Owner, " - "Phenotype.Authorized_Users, " - "CAST(Publication.Authors AS BINARY), " - "CAST(Publication.Title AS BINARY), " - "CAST(Publication.Abstract AS BINARY), " - "CAST(Publication.Journal AS BINARY), " - "Publication.Volume, Publication.Pages, " - "Publication.Month, Publication.Year, " - "PublishXRef.Sequence, Phenotype.Units, " - "PublishXRef.comments FROM PublishXRef, Publication, " - "Phenotype, PublishFreeze, InbredSet WHERE " - "PublishXRef.Id = %s AND " - "Phenotype.Id = PublishXRef.PhenotypeId " - "AND Publication.Id = PublishXRef.PublicationId " - "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishXRef.InbredSetId = InbredSet.Id AND " - "PublishFreeze.Id = %s", - (trait.name, dataset.id,) - ) - trait_info = cursor.fetchone() - - # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name - # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. - elif dataset.type == 'ProbeSet': - display_fields_string = ', ProbeSet.'.join(dataset.display_fields) - display_fields_string = f'ProbeSet.{display_fields_string}' - cursor.execute( - f"SELECT {display_fields_string} FROM ProbeSet, ProbeSetFreeze, " - "ProbeSetXRef WHERE " - "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " - "ProbeSetFreeze.Name = %s AND " - "ProbeSet.Name = %s", - (dataset.name, str(trait.name),) - ) - trait_info = cursor.fetchone() - # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name - # to avoid the problem of same marker name from different species. - elif dataset.type == 'Geno': - display_fields_string = ',Geno.'.join(dataset.display_fields) - display_fields_string = f'Geno.{display_fields_string}' - cursor.execute( - f"SELECT {display_fields_string} FROM Geno, GenoFreeze, " - "GenoXRef WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id " - "AND GenoXRef.GenoId = Geno.Id " - "AND GenoFreeze.Name = %s " - "AND Geno.Name = %s", - (dataset.name, trait.name) - ) - trait_info = cursor.fetchone() - else: # Temp type - cursor.execute( - f"SELECT {','.join(dataset.display_fields)} " - f"FROM {dataset.type} WHERE Name = %s", - (trait.name,) - ) - trait_info = cursor.fetchone() - - if trait_info: - trait.haveinfo = True - for i, field in enumerate(dataset.display_fields): - holder = trait_info[i] - if isinstance(holder, bytes): - holder = holder.decode("utf-8", errors="ignore") - setattr(trait, field, holder) - - if dataset.type == 'Publish': - if trait.group_code: - trait.display_name = trait.group_code + "_" + str(trait.name) - - trait.confidential = 0 - if trait.pre_publication_description and not trait.pubmed_id: - trait.confidential = 1 - - description = trait.post_publication_description - - # If the dataset is confidential and the user has access to confidential - # phenotype traits, then display the pre-publication description instead - # of the post-publication description - trait.description_display = "N/A" - trait.abbreviation = "N/A" - if not trait.pubmed_id: - if trait.pre_publication_abbreviation: - trait.abbreviation = trait.pre_publication_abbreviation - if trait.pre_publication_description: - trait.description_display = trait.pre_publication_description - else: - if trait.post_publication_abbreviation: - trait.abbreviation = trait.post_publication_abbreviation - if description: - trait.description_display = description.strip() - - if not trait.year.isdigit(): - trait.pubmed_text = "N/A" - else: - trait.pubmed_text = trait.year - - if trait.pubmed_id: - trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id - - if dataset.type == 'ProbeSet' and dataset.group: - description_string = trait.description - target_string = trait.probe_target_description - - if str(description_string or "") != "" and description_string != 'None': - description_display = description_string - else: - description_display = trait.symbol - - if (str(description_display or "") != "" - and description_display != 'N/A' - and str(target_string or "") != "" and target_string != 'None'): - description_display = description_display + '; ' + target_string.strip() - - # Save it for the jinja2 template - trait.description_display = description_display - - trait.location_repr = 'N/A' - if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % ( - trait.chr, float(trait.mb)) - - elif dataset.type == "Geno": - trait.location_repr = 'N/A' - if trait.chr and trait.mb: - trait.location_repr = 'Chr%s: %.6f' % ( - trait.chr, float(trait.mb)) - - if get_qtl_info: - # LRS and its location - trait.LRS_score_repr = "N/A" - trait.LRS_location_repr = "N/A" - trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" - if dataset.type == 'ProbeSet' and not trait.cellid: - trait.mean = "" - cursor.execute( - "SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, " - "ProbeSetXRef.pValue, ProbeSetXRef.mean, " - "ProbeSetXRef.additive FROM ProbeSetXRef, " - "ProbeSet WHERE " - "ProbeSetXRef.ProbeSetId = ProbeSet.Id " - "AND ProbeSet.Name = %s AND " - "ProbeSetXRef.ProbeSetFreezeId = %s", - (trait.name, dataset.id,) - ) - trait_qtl = cursor.fetchone() - if any(trait_qtl): - trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl - if trait.locus: - cursor.execute( - "SELECT Geno.Chr, Geno.Mb FROM " - "Geno, Species WHERE " - "Species.Name = %s AND " - "Geno.Name = %s AND " - "Geno.SpeciesId = Species.Id", - (dataset.group.species, trait.locus,) - ) - if result := cursor.fetchone() : - trait.locus_chr = result[0] - trait.locus_mb = result[1] - else: - trait.locus_chr = trait.locus_mb = "" - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - - if dataset.type == 'Publish': - cursor.execute( - "SELECT PublishXRef.Locus, PublishXRef.LRS, " - "PublishXRef.additive FROM " - "PublishXRef, PublishFreeze WHERE " - "PublishXRef.Id = %s AND " - "PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishFreeze.Id = %s", (trait.name, dataset.id,) - ) - if trait_qtl := cursor.fetchone(): - trait.locus, trait.lrs, trait.additive = trait_qtl - if trait.locus: - cursor.execute( - "SELECT Geno.Chr, Geno.Mb FROM Geno, " - "Species WHERE Species.Name = %s " - "AND Geno.Name = %s AND " - "Geno.SpeciesId = Species.Id", - (dataset.group.species, trait.locus,) - ) - if result := cursor.fetchone(): - trait.locus_chr = result[0] - trait.locus_mb = result[1] - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" - else: - trait.locus = trait.lrs = trait.additive = "" - if (dataset.type == 'Publish' or dataset.type == "ProbeSet"): - if str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": - trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( - trait.locus_chr, float(trait.locus_mb)) - if str(trait.lrs or "") != "": - trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs - else: - raise KeyError( - f"{repr(trait.name)} information is not found in the database " - f"for dataset '{dataset.name}' with id '{dataset.id}'.") - return trait diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py deleted file mode 100644 index dd6fad04..00000000 --- a/wqflask/base/webqtlCaseData.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Affero General Public License -# as published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU Affero General Public License for more details. -# -# This program is available from Source Forge: at GeneNetwork Project -# (sourceforge.net/projects/genenetwork/). -# -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# -# This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 - - -import utility.tools - -utility.tools.show_settings() - - -class webqtlCaseData: - """one case data in one trait""" - - def __init__(self, name, value=None, variance=None, num_cases=None, name2=None): - self.name = name - # Other name (for traits like BXD65a) - self.name2 = name2 - self.value = value # Trait Value - self.variance = variance # Trait Variance - self.num_cases = num_cases # Number of individuals/cases - self.extra_attributes = None - # Set a sane default (can't be just "id" cause that's a reserved word) - self.this_id = None - self.outlier = None # Not set to True/False until later - - def __repr__(self): - case_data_string = "<webqtlCaseData> " - if self.value is not None: - case_data_string += "value=%2.3f" % self.value - if self.variance is not None: - case_data_string += " variance=%2.3f" % self.variance - if self.num_cases: - case_data_string += " ndata=%s" % self.num_cases - if self.name: - case_data_string += " name=%s" % self.name - if self.name2: - case_data_string += " name2=%s" % self.name2 - return case_data_string - - @property - def class_outlier(self): - """Template helper""" - if self.outlier: - return "outlier" - return "" - - @property - def display_value(self): - if self.value is not None: - return "%2.3f" % self.value - return "x" - - @property - def display_variance(self): - if self.variance is not None: - return "%2.3f" % self.variance - return "x" - - @property - def display_num_cases(self): - if self.num_cases is not None: - return "%s" % self.num_cases - return "x" diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py deleted file mode 100644 index a7dbed3d..00000000 --- a/wqflask/base/webqtlConfig.py +++ /dev/null @@ -1,107 +0,0 @@ -# ' -# Environment Variables - public -# -# Note: much of this needs to handled by the settings/environment -# scripts. But rather than migrating everything in one go, we'll -# take it a step at a time. First the hard coded paths get replaced -# with those in utility/tools.py -# -######################################### -import os -from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR - -# Debug Level -# 1 for debug, mod python will reload import each time -DEBUG = 1 - -# USER privilege -USERDICT = {'guest': 1, 'user': 2, 'admin': 3, 'root': 4} - -# Set privileges -SUPER_PRIVILEGES = {'data': 'edit', 'metadata': 'edit', 'admin': 'edit-admins'} -DEFAULT_PRIVILEGES = {'data': 'view', 'metadata': 'view', 'admin': 'not-admin'} - -# minimum number of informative strains -KMININFORMATIVE = 5 - -# Daily download limit from one IP -DAILYMAXIMUM = 1000 - -# maximum LRS value -MAXLRS = 460.0 - -# MINIMUM Database public value -PUBLICTHRESH = 0 - -# Groups to treat as unique when drawing correlation dropdowns (not sure if this logic even makes sense or is necessary) -BXD_GROUP_EXCEPTIONS = ['BXD-Longevity', 'BXD-AE', 'BXD-Heart-Metals', 'BXD-NIA-AD'] - -# EXTERNAL LINK ADDRESSES -PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" -UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' -GENOMEBROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s" -NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s" -GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" -OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" -UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s" -HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/homologene/?term=%s" -GENOTATION_URL = "http://www.genotation.org/Getd2g.pl?gene_list=%s" -GTEX_URL = "https://www.gtexportal.org/home/gene/%s" -GENEBRIDGE_URL = "https://www.systems-genetics.org/modules_by_gene/%s?organism=%s" -GENEMANIA_URL = "https://genemania.org/search/%s/%s" -UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s" -BIOGPS_URL = "http://biogps.org/?org=%s#goto=genereport&id=%s" -STRING_URL = "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=%s" -PANTHER_URL = "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=%s" -GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s" -ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s" -EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s" -WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s" -ENSEMBLETRANSCRIPT_URL = "http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s" -DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s' -PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s" -OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s" -UNIPROT_URL = "https://www.uniprot.org/uniprot/%s" -RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s" -PHENOGEN_URL = "https://phenogen.org/gene.jsp?speciesCB=Rn&auto=Y&geneTxt=%s&genomeVer=rn7§ion=geneEQTL" -RRID_MOUSE_URL = "https://www.jax.org/strain/%s" -RRID_RAT_URL = "https://rgd.mcw.edu/rgdweb/report/strain/main.html?id=%s" - -# Temporary storage (note that this TMPDIR can be set as an -# environment variable - use utility.tools.TEMPDIR when you -# want to reach this base dir -assert_writable_dir(TEMPDIR) - -TMPDIR = mk_dir(TEMPDIR + '/gn2/') -assert_writable_dir(TMPDIR) - -CACHEDIR = mk_dir(TMPDIR + '/cache/') -# We can no longer write into the git tree: -GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/') -GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/') - -# Make sure we have permissions to access these -assert_writable_dir(CACHEDIR) -assert_writable_dir(GENERATED_IMAGE_DIR) -assert_writable_dir(GENERATED_TEXT_DIR) - -# Flat file directories -GENODIR = flat_files('genotype') + '/' -assert_dir(GENODIR) -# assert_dir(GENODIR+'bimbam') # for gemma - -# JSON genotypes are OBSOLETE -JSON_GENODIR = flat_files('genotype/json') + '/' -if not valid_path(JSON_GENODIR): - # fall back on old location (move the dir, FIXME) - JSON_GENODIR = flat_files('json') - - -TEXTDIR = os.path.join(os.environ.get( - "GNSHARE", "/gnshare/gn/"), "web/ProbeSetFreeze_DataMatrix") -# Are we using the following...? -PORTADDR = "http://50.16.251.170" -INFOPAGEHREF = '/dbdoc/%s.html' -CGIDIR = '/webqtl/' # XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' |