From 8aeff9b91d078a40a50d13f6393a1f1dabf62aa4 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 18 Jan 2013 16:58:28 -0600 Subject: Renamed CorrelationPage.py to show_corr_results.py Worked with correlation code; got to the code that begins to do the actual correlations Created a function "get_dataset_and_trait" in the new file "helper_functions.py" because the code initializing the dataset and trait objects was repeated in multiple places --- wqflask/base/data_set.py | 1 + wqflask/base/trait.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 50ef8f57..7088913c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -741,3 +741,4 @@ def geno_mrna_confidentiality(ob): if confidential: # Allow confidential data later NoConfindetialDataForYouTodaySorry + diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 241bf2ab..2af4bc24 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -314,27 +314,27 @@ class GeneralTrait: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. #XZ: The geneid values in database should be cleaned up. - try: - junk = float(self.geneid) - geneidIsNumber = 1 - except: - geneidIsNumber = 0 - - if geneidIsNumber: - query = """ - SELECT - HomologeneId - FROM - Homologene, Species, InbredSet - WHERE - Homologene.GeneId =%s AND - InbredSet.Name = '%s' AND - InbredSet.SpeciesId = Species.Id AND - Species.TaxonomyId = Homologene.TaxonomyId - """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) - result = g.db.execute(query).fetchone() - else: - result = None + #try: + # float(self.geneid) + # geneidIsNumber = True + #except ValueError: + # geneidIsNumber = False + + #if geneidIsNumber: + query = """ + SELECT + HomologeneId + FROM + Homologene, Species, InbredSet + WHERE + Homologene.GeneId =%s AND + InbredSet.Name = '%s' AND + InbredSet.SpeciesId = Species.Id AND + Species.TaxonomyId = Homologene.TaxonomyId + """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + result = g.db.execute(query).fetchone() + #else: + # result = None if result: self.homologeneid = result[0] -- cgit v1.2.3 From 59ed965cff915b1a96c387d947af9f673512c627 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 25 Jan 2013 21:00:08 +0000 Subject: Fixed a few bugs while trying to get the code running on the new server --- wqflask/base/webqtlConfig.py | 4 ++-- wqflask/requirements.txt | 8 -------- wqflask/wqflask/correlation/correlationFunction.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 2 +- wqflask/wqflask/search_results.py | 6 ++++-- wqflask/wqflask/show_trait/SampleList.py | 2 +- 6 files changed, 9 insertions(+), 15 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 755595e0..d5f09b64 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -63,8 +63,8 @@ CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' ChangableHtmlPath = GNROOT + 'web/' SITENAME = 'GN' -PORTADDR = "http://132.192.47.32" -BASEHREF = '' +PORTADDR = "http://50.16.251.170" +BASEHREF = '' INFOPAGEHREF = '/dbdoc/%s.html' GLOSSARYFILE = "/glossary.html" CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' diff --git a/wqflask/requirements.txt b/wqflask/requirements.txt index d775ff21..cd75ae9a 100644 --- a/wqflask/requirements.txt +++ b/wqflask/requirements.txt @@ -3,20 +3,12 @@ Jinja2==2.6 MySQL-python==1.2.3 Piddle==0.1-dev PyYAML==3.10 -Reaper==1.0 SQLAlchemy==0.7.9 Werkzeug==0.8.3 -ipython==0.13.1 logging-tree==1.1 -logilab-astng==0.24.1 -logilab-common==0.58.3 -mercurial==2.4.2 nose==1.2.1 -numarray==1.5.2 numpy==1.6.2 pp==1.6.3 -pylint==0.26.0 -readline==6.2.4.1 requests==0.13.3 rpy2==2.3.1 scipy==0.11.0 diff --git a/wqflask/wqflask/correlation/correlationFunction.py b/wqflask/wqflask/correlation/correlationFunction.py index 2c1870fe..7d4b58a9 100644 --- a/wqflask/wqflask/correlation/correlationFunction.py +++ b/wqflask/wqflask/correlation/correlationFunction.py @@ -31,7 +31,7 @@ import pp import string from utility import webqtlUtil -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from dbFunction import webqtlDatabaseFunction diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index b82f1c59..96298b37 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -46,7 +46,7 @@ import reaper from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from base import data_set from base.templatePage import templatePage from utility import webqtlUtil, helper_functions diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index d986a2e0..ae1cadd0 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function +from __future__ import absolute_import, print_function, division from wqflask import app @@ -16,6 +16,8 @@ import datetime from pprint import pformat as pf +from flask import Flask, g + # Instead of importing HT we're going to build a class below until we can eliminate it from htmlgen import HTMLgen2 as HT @@ -45,7 +47,7 @@ class SearchResultPage(templatePage): #logging_tree.printout() self.fd = fd templatePage.__init__(self, fd) - assert self.openMysql(), "Couldn't open MySQL" + #assert self.openMysql(), "Couldn't open MySQL" print("fd is:", pf(fd)) print("fd.dict is:", pf(fd['dataset'])) diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index d98a810a..1130fb60 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -4,7 +4,7 @@ from flask import Flask, g from base import webqtlCaseData from utility import webqtlUtil, Plot, Bunch -from base.webqtlTrait import GeneralTrait +from base.trait import GeneralTrait from pprint import pformat as pf -- cgit v1.2.3 From aa1cff2d2bae08dadb6e9f8be759c4f13a974e73 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 20:41:51 +0000 Subject: Changed fd to kw in search_results.py, commented out some lines related to fd --- wqflask/base/data_set.py | 1 - wqflask/wqflask/search_results.py | 40 ++++++++++++++++----------------------- 2 files changed, 16 insertions(+), 25 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 7088913c..8ced1528 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -41,7 +41,6 @@ from pprint import pformat as pf DS_NAME_MAP = {} def create_dataset(dataset_name): - #cursor = db_conn.cursor() print("dataset_name:", dataset_name) query = """ diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index ae1cadd0..1be1185c 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -26,7 +26,6 @@ from utility.THCell import THCell from utility.TDCell import TDCell from base.data_set import create_dataset from base.trait import GeneralTrait -from base.templatePage import templatePage from wqflask import parser from wqflask import do_search from utility import webqtlUtil @@ -37,22 +36,18 @@ from utility import formatting #from base.JinjaPage import JinjaEnv, JinjaPage -class SearchResultPage(templatePage): +class SearchResultPage(): #maxReturn = 3000 - def __init__(self, fd): + def __init__(self, kw): print("initing SearchResultPage") #import logging_tree #logging_tree.printout() - self.fd = fd - templatePage.__init__(self, fd) + #self.fd = fd + #templatePage.__init__(self, fd) #assert self.openMysql(), "Couldn't open MySQL" - print("fd is:", pf(fd)) - print("fd.dict is:", pf(fd['dataset'])) - self.dataset = fd['dataset'] - # change back to self.dataset #if not self.dataset or self.dataset == 'spacer': # #Error, No dataset selected @@ -62,20 +57,17 @@ class SearchResultPage(templatePage): ########################################### # All Phenotypes is a special case we'll deal with later - if self.dataset == "All Phenotypes": - self.cursor.execute(""" - select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, - InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = - PublishFreeze.InbredSetId""") - results = self.cursor.fetchall() - self.dataset = map(lambda x: DataSet(x[0], self.cursor), results) - self.dataset_groups = map(lambda x: x[1], results) - self.dataset_group_ids = map(lambda x: x[2], results) - else: - print("self.dataset is:", pf(self.dataset)) - # Replaces a string with an object - self.dataset = create_dataset(self.dataset) - print("self.dataset is now:", pf(self.dataset)) + #if kw['dataset'] == "All Phenotypes": + # self.cursor.execute(""" + # select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze, + # InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id = + # PublishFreeze.InbredSetId""") + # results = self.cursor.fetchall() + # self.dataset = map(lambda x: DataSet(x[0], self.cursor), results) + # self.dataset_groups = map(lambda x: x[1], results) + # self.dataset_group_ids = map(lambda x: x[2], results) + #else: + self.dataset = create_dataset(kw['dataset']) self.search() self.gen_search_result() @@ -110,7 +102,7 @@ class SearchResultPage(templatePage): def search(self): - self.search_terms = parser.parse(self.fd['search_terms']) + self.search_terms = parser.parse(self.kw['search_terms']) print("After parsing:", self.search_terms) self.results = [] -- cgit v1.2.3 From 9b0264bf13e994298de95a4e08198336b6c97a38 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 7 Feb 2013 17:58:34 -0600 Subject: Added code to marker_regression.py that creates the numpy arrays to pass to Nick's code and changed the prep_data.py code to operate on a list of phenotype values instead of a textfile with the values delimited --- wqflask/base/webqtlConfig.py | 3 +- wqflask/base/webqtlConfigLocal.py | 2 +- .../wqflask/marker_regression/marker_regression.py | 42 +++++++++--- wqflask/wqflask/my_pylmm/data/prep_data.py | 74 +++++++++++++--------- wqflask/wqflask/my_pylmm/example.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 21 +++++- wqflask/wqflask/show_trait/show_trait.py | 2 +- wqflask/wqflask/views.py | 6 +- 8 files changed, 103 insertions(+), 49 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index d5f09b64..d05fa6e0 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -55,8 +55,9 @@ HTMLPATH = GNROOT + 'web/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' -TMPDIR = '/tmp/' +TMPDIR = HTMLPATH + 'tmp/' GENODIR = HTMLPATH + 'genotypes/' +NEWGENODIR = HTMLPATH + 'new_genotypes/' GENO_ARCHIVE_DIR = GENODIR + 'archive/' TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/' CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py index 84686234..8e3e0bbe 100755 --- a/wqflask/base/webqtlConfigLocal.py +++ b/wqflask/base/webqtlConfigLocal.py @@ -12,7 +12,7 @@ DB_UPDNAME = 'db_webqtl_zas1024' DB_UPDUSER = 'webqtl' DB_UPDPASSWD = 'webqtl' -GNROOT = '/home/zas1024/gn/' +GNROOT = '/home/zas1024/gene/' ROOT_URL = 'http://alexandria.uthsc.edu:91/' PythonPath = '/usr/bin/python' PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 7cdc350f..92270eb2 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -15,6 +15,8 @@ import os import httplib import urllib +import numpy as np + from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil @@ -25,6 +27,8 @@ from utility import webqtlUtil, helper_functions from base import webqtlConfig from dbFunction import webqtlDatabaseFunction from base.GeneralObject import GeneralObject +from wqflask.my_pylmm.data import prep_data +from wqflask.my_pylmm.pyLMM import lmm import reaper import cPickle @@ -63,22 +67,24 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] - self.variances = [] + #self.variances = [] assert start_vars['display_all_lrs'] in ('True', 'False') self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] - variance = start_vars['variance:' + sample] - if variance.strip().lower() == 'x': - variance = 0 - else: - variance = float(variance) - if value.strip().lower() != 'x': - self.samples.append(str(sample)) - self.vals.append(float(value)) - self.variances.append(variance) + #variance = start_vars['variance:' + sample] + #if variance.strip().lower() == 'x': + # variance = 0 + #else: + # variance = float(variance) + #if value.strip().lower() != 'x': + self.samples.append(str(sample)) + self.vals.append(value) + #self.variances.append(variance) + + #self.initializeParameters(start_vars) @@ -447,6 +453,22 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" + prep_data.PrepData(self.vals, self.dataset.group.name) + + pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) + genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, + self.dataset.group.name + '.snps.new')) + + print("genotypes is:", pf(genotypes)) + + kinship_matrix = lmm.calculateKinship(genotypes) + print("kinship_matrix is:", pf(kinship_matrix)) + print("pheno_vector is:", pf(pheno_vector)) + + lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) + lmm_ob.fit() + + #calculate QTL for each trait self.qtl_results = self.genotype.regression(strains = self.samples, trait = self.vals) diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py index b7a133c2..ef42a297 100644 --- a/wqflask/wqflask/my_pylmm/data/prep_data.py +++ b/wqflask/wqflask/my_pylmm/data/prep_data.py @@ -1,27 +1,29 @@ #!/usr/bin/python from __future__ import absolute_import, print_function, division +import os + import numpy - +from base import webqtlConfig + + class PrepData(object): - def __init__(self, exprs_file, snps_file): - self.exprs_file = exprs_file - self.snps_file = snps_file - self.empty_columns = set() + def __init__(self, pheno_vector, group_name): + self.pheno_vector = pheno_vector + self.group_name = group_name + self.no_val_samples = set() #self.identify_no_genotype_samples() self.identify_empty_samples() self.trim_files() def identify_empty_samples(self): - with open(self.exprs_file) as fh: - for line in fh: - for pos, item in enumerate(line.split()): - if item == "NA": - self.empty_columns.add(pos) - #print("self.empty_columns:", self.empty_columns) - nums = set(range(0, 176)) - print("not included:", nums-self.empty_columns) + for sample_count, val in enumerate(self.pheno_vector): + if val == "x": + self.no_val_samples.add(sample_count) + print("self.no_val_samples:", self.no_val_samples) + #nums = set(range(0, 176)) + #print("not included:", nums-self.empty_columns) #def identify_no_genotype_samples(self): # #for this_file in (self.exprs_file, self.snps_file): @@ -43,22 +45,36 @@ class PrepData(object): # print(no_geno_samples) def trim_files(self): - for this_file in (self.exprs_file, self.snps_file): - input_file = open(this_file) - this_file_name_output = this_file + ".new" - with open(this_file_name_output, "w") as output: - for line in input_file: - data_wanted = [] - for pos, item in enumerate(line.split()): - if pos in self.empty_columns: - continue - else: - data_wanted.append("%2s" % (item)) - #print("data_wanted is", data_wanted) - output.write(" ".join(data_wanted) + "\n") - print("Done writing file:", this_file_name_output) + input_file = open(os.path.join(webqtlConfig.NEWGENODIR, self.group_name+'.snps')) + output_file = os.path.join(webqtlConfig.TMPDIR, self.group_name + '.snps.new') + with open(output_file, "w") as output_file: + for line in input_file: + data_to_write = [] + for pos, item in enumerate(line.split()): + if pos in self.no_val_samples: + continue + else: + data_to_write.append("%s" % (item)) + output_file.write(" ".join(data_to_write) + "\n") + + print("Done writing:", output_file) + + #for this_file in (self.exprs_file, self.genotype_file): + # input_file = open(this_file) + # this_file_name_output = this_file + ".new" + # with open(this_file_name_output, "w") as output_file: + # for line in input_file: + # data_wanted = [] + # for pos, item in enumerate(line.split()): + # if pos in self.empty_columns: + # continue + # else: + # data_wanted.append("%2s" % (item)) + # #print("data_wanted is", data_wanted) + # output_file.write(" ".join(data_wanted) + "\n") + # print("Done writing file:", this_file_name_output) if __name__=="__main__": exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1""" - snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" - PrepData(exprs_file, snps_file) \ No newline at end of file + genotype_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" + PrepData(pheno_vector, genotype_file) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py index 0348d67b..8b30debd 100644 --- a/wqflask/wqflask/my_pylmm/example.py +++ b/wqflask/wqflask/my_pylmm/example.py @@ -20,7 +20,7 @@ print("exprs is:", pf(Y.shape)) # These three lines will load all SNPs (from npdump or from txt) and # calculate the kinship -snps = np.genfromtxt('data/mdp.snps.1000.new').T +snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T print("snps is:", pf(snps.shape)) #snps = snps[~np.isnan(snps).all(axis=1)] #print ("snps is now:", pf(snps)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 7fe599c4..1ae663d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -142,20 +142,35 @@ class LMM: is not done consistently. """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None): - + def __init__(self, Y, K, Kva=None, Kve=None, X0=None): """ The constructor takes a phenotype vector or array of size n. It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. If they are not provided, the constructor will calculate them. X0 is an optional covariate matrix of size n x q, where there are q covariates. When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if Kva is None: + Kva = [] + if Kve is None: + Kve = [] + + + if X0 == None: + X0 = np.ones(len(Y)).reshape(len(Y),1) + print("Y is:", pf(Y)) + + for key, value in locals().iteritems(): + print(" %s - %s" % (key, type(value))) + x = Y != -9 + print("x is:", pf(x)) if not x.sum() == len(Y): + print("x.sum is:", pf(x.sum())) + print("len(Y) is:", pf(len(Y))) sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) Y = Y[x] K = K[x,:][:,x] diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 603c40f5..33ea6e86 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -130,7 +130,7 @@ class ShowTrait(object): js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes) - print("js_data:", pf(js_data)) + #print("js_data:", pf(js_data)) self.js_data = js_data diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 472548f0..81777742 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -136,15 +136,15 @@ def show_trait_page(): #fd = webqtlFormData.webqtlFormData(request.args) #print("stp y1:", pf(vars(fd))) template_vars = show_trait.ShowTrait(request.args) - print("js_data before dump:", template_vars.js_data) + #print("js_data before dump:", template_vars.js_data) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") # Sorting the keys messes up the ordered dictionary, so don't do that #sort_keys=True) - print("js_data after dump:", template_vars.js_data) - print("show_trait template_vars:", pf(template_vars.__dict__)) + #print("js_data after dump:", template_vars.js_data) + #print("show_trait template_vars:", pf(template_vars.__dict__)) return render_template("show_trait.html", **template_vars.__dict__) @app.route("/marker_regression", methods=('POST',)) -- cgit v1.2.3 From b3853925653cf6145d7fb56b71edfc824a2d051a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 12 Feb 2013 16:20:56 -0600 Subject: Edited marker_regression.py and data_set.py to store the p-values and their corresponding markers to be used in the table of qtl results and other figures --- wqflask/base/data_set.py | 29 +++++++- .../wqflask/marker_regression/marker_regression.py | 80 +++++++++++++--------- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 15 +++- 3 files changed, 89 insertions(+), 35 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 8ced1528..182e15e6 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,8 @@ from __future__ import absolute_import, print_function, division import os +import json + from flask import Flask, g from htmlgen import HTMLgen2 as HT @@ -64,6 +66,21 @@ def create_dataset(dataset_name): return dataset_class(dataset_name) +class Markers(object): + """Todo: Build in cacheing so it saves us reading the same file more than once""" + def __init__(self, name): + json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) + self.markers = json.load(json_data) + + def add_pvalues(p_values): + #for count, marker in enumerate(self.markers): + # marker['p_value'] = p_values[count] + + for marker, p_value in itertools.izip(self.markers, p_values): + marker['p_value'] = p_value + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. @@ -84,6 +101,7 @@ class DatasetGroup(object): self.f1list = None self.parlist = None self.allsamples = None + self.markers = Markers(self.name) #def read_genotype(self): @@ -91,9 +109,16 @@ class DatasetGroup(object): # # if not self.genotype: # Didn'd succeed, so we try method 2 # self.read_genotype_data() - + + #def read_genotype_json(self): + # '''Read genotype from json file''' + # + # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json')) + # markers = json.load(json_data) + # + def read_genotype_file(self): - '''read genotype from .geno file instead of database''' + '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': # self.group = 'BXD' # diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 13ec4280..1d005df4 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -454,10 +454,11 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - - json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) - markers = json.load(json_data) - genotype_data = [marker['genotypes'] for marker in markers] + + #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) + #markers = json.load(json_data) + + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) @@ -466,7 +467,6 @@ class MarkerRegression(object): #for marker_object in genotype_data: # print("marker_object:", pf(marker_object)) - #prep_data.PrepData(self.vals, genotype_data) @@ -492,40 +492,60 @@ class MarkerRegression(object): refit=False) print("p_values is:", pf(len(p_values))) + + self.dataset.group.markers.add_pvalues(p_values) #calculate QTL for each trait - self.qtl_results = self.genotype.regression(strains = self.samples, - trait = self.vals) - self.lrs_array = self.genotype.permutation(strains = self.samples, - trait = self.vals, - nperm=self.num_perm) + #self.qtl_results = self.genotype.regression(strains = self.samples, + # trait = self.vals) + #self.lrs_array = self.genotype.permutation(strains = self.samples, + # trait = self.vals, + # nperm=self.num_perm) + + self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers] self.lrs_thresholds = Bunch( - suggestive = self.lrs_array[int(self.num_perm*0.37-1)], - significant = self.lrs_array[int(self.num_perm*0.95-1)], - highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + suggestive = self.lrs_values[int(self.num_perm*0.37-1)], + significant = self.lrs_values[int(self.num_perm*0.95-1)], + highly_significant = self.lrs_values[int(self.num_perm*0.99-1)] ) + #self.lrs_thresholds = Bunch( + # suggestive = self.lrs_array[int(self.num_perm*0.37-1)], + # significant = self.lrs_array[int(self.num_perm*0.95-1)], + # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + # ) + if self.display_all_lrs: - filtered_results = self.qtl_results + self.filtered_results = self.dataset.group.markers.markers else: - suggestive_results = [] + self.filtered_results = [] self.pure_qtl_results = [] - for result in self.qtl_results: - self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, - mb=result.locus.Mb, - chromosome=result.locus.chr), - lrs=result.lrs, - additive=result.additive)) - if result.lrs > self.lrs_thresholds.suggestive: - suggestive_results.append(result) - filtered_results = suggestive_results + for marker in self.dataset.group.markers.markers: + self.pure_qtl_results.append(marker) + if marker['lrs_value'] > self.lrs_thresholds.suggestive: + self.filtered_results.append(marker) + + #if self.display_all_lrs: + # filtered_results = self.qtl_results + #else: + # suggestive_results = [] + # self.pure_qtl_results = [] + # for result in self.qtl_results: + # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, + # mb=result.locus.Mb, + # chromosome=result.locus.chr), + # lrs=result.lrs, + # additive=result.additive)) + # if result.lrs > self.lrs_thresholds.suggestive: + # suggestive_results.append(result) + # filtered_results = suggestive_results # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used - if not filtered_results: + if not self.filtered_results: # We use the 10 results with the highest LRS values - filtered_results = sorted(self.qtl_results)[-10:] + self.filtered_results = sorted(self.qtl_results)[-10:] self.top_10 = True else: self.top_10 = False @@ -567,11 +587,9 @@ class MarkerRegression(object): #permutation = HT.TableLite() #permutation.append(HT.TR(HT.TD(img))) - for marker in filtered_results: - if marker.lrs > webqtlConfig.MAXLRS: - marker.lrs = webqtlConfig.MAXLRS - - self.filtered_results = filtered_results + for marker in self.filtered_results: + if marker['lrs_value'] > webqtlConfig.MAXLRS: + marker['lrs_value'] = webqtlConfig.MAXLRS #if fd.genotype.type == 'intercross': # ncol =len(headerList) diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index ec8c521c..8c74fe74 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -28,10 +28,11 @@ class Marker(object): class ConvertGenoFile(object): - def __init__(self, input_file, output_file): + def __init__(self, input_file, output_file, file_type): self.input_file = input_file self.output_file = output_file + self.file_type = file_type self.mb_exists = False self.markers = [] @@ -57,7 +58,10 @@ class ConvertGenoFile(object): self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: - self.process_csv() + if self.file_type == "geno": + self.process_csv() + elif self.file_type == "snps": + self.process_snps_file() #def process_row(self, row): @@ -66,6 +70,7 @@ class ConvertGenoFile(object): # if char # counter += 1 + def process_csv(self): for row_count, row in enumerate(self.process_rows()): #self.latest_row_pos = row_count @@ -146,6 +151,12 @@ class ConvertGenoFile(object): print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break + + def process_snps_file(cls, snps_file, new_directory): + output_file = os.path.join(new_directory, "mouse_families.json") + print("%s -> %s" % (snps_file, output_file)) + convertob = ConvertGenoFile(input_file, output_file) + if __name__=="__main__": -- cgit v1.2.3 From e416b7b4fcabff05d1665ae5dbb962cfb61e471d Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 13 Feb 2013 14:26:52 -0600 Subject: Fixed some bugs related to getting the marker regression page working with Nick's code --- wqflask/base/data_set.py | 9 ++++----- wqflask/wqflask/marker_regression/marker_regression.py | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 182e15e6..d4e97370 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -22,8 +22,10 @@ from __future__ import absolute_import, print_function, division import os +import math import json +import itertools from flask import Flask, g @@ -70,12 +72,9 @@ class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" def __init__(self, name): json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) - self.markers = json.load(json_data) + self.markers = json.load(json_data_fh) - def add_pvalues(p_values): - #for count, marker in enumerate(self.markers): - # marker['p_value'] = p_values[count] - + def add_pvalues(self, p_values): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 1d005df4..c9451154 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -458,7 +458,7 @@ class MarkerRegression(object): #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) #markers = json.load(json_data) - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers] + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) @@ -502,7 +502,9 @@ class MarkerRegression(object): # trait = self.vals, # nperm=self.num_perm) - self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers] + self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] + print("self.lrs_values is:", pf(self.lrs_values)) + print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) self.lrs_thresholds = Bunch( suggestive = self.lrs_values[int(self.num_perm*0.37-1)], -- cgit v1.2.3 From e334da7dd55dd102c471a144ef0a63389830a256 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 14 Feb 2013 17:45:10 -0600 Subject: Have marker regression results displaying as a scatterplot (just the points) using d3.js --- wqflask/base/data_set.py | 2 + .../static/new/javascript/marker_regression.coffee | 260 +++++---------------- .../static/new/javascript/marker_regression.js | 254 ++++---------------- .../new/javascript/marker_regression_old.coffee | 212 +++++++++++++++++ wqflask/wqflask/templates/marker_regression.html | 9 +- 5 files changed, 310 insertions(+), 427 deletions(-) create mode 100644 wqflask/wqflask/static/new/javascript/marker_regression_old.coffee (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index d4e97370..0b3b5248 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -77,9 +77,11 @@ class Markers(object): def add_pvalues(self, p_values): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value + marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee index 7eb62ec1..6a10d95e 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee +++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee @@ -1,212 +1,56 @@ $ -> - sort_number = (a, b) -> - return a - b - - class Permutation_Histogram - constructor: -> - @process_data() - @display_graph() - - process_data: -> - # Put the data in a format needed for graphing - # The permutation count for a particular integer range (10-11 or 12-13 for example) - # will be on the y-axis; LRS values will be on the x-axis - lrs_array = js_data.lrs_array - bars = {} - for lrs in lrs_array - floored = Math.floor(lrs) - if floored not of bars - bars[floored] = 0 - bars[floored] += 1 - - # Now we need to take the unordered hash - # And order the keys - keys = [] - for key of bars - keys.push(key) - - keys.sort(sort_number) - - - # Now that we have the ordered keys above - # We can build an array of arrays that jqPlot will use - @bars_ordered = [] - for key in keys - @bars_ordered.push([parseInt(key), bars[key]]) + x_coords = [] + y_coords = [] - console.log("bars is:", bars) - console.log("keys are:", keys) - console.log("bars_ordered are:", @bars_ordered) - #return bars_ordered - - display_graph: -> - - $.jqplot('permutation_histogram', [@bars_ordered], - title: 'Permutation Histogram' - seriesDefaults: - renderer:$.jqplot.BarRenderer - rendererOptions: - barWidth: 15 - pointLabels: - show: true - axesDefaults: - labelRenderer: $.jqplot.CanvasAxisLabelRenderer - axes: - xaxis: - min: 0 - label: "LRS" - pad: 1.1 - yaxis: - min: 0 - label: "Frequency" - ) - - #process_qtl_results = -> - # qtl_results = js_data.qtl_results - - #display_manhattan_plot = -> - - - - #bars_ordered = process_lrs_array() - #display_permutation_histogram(bars_ordered) - - class Chromosome - constructor: (@name) -> - @max_mb = 0 - @plot_points = [] - - process_point: (mb, lrs) -> - if mb > @max_mb - @max_mb = mb - @plot_points.push([mb, lrs]) - - display_graph: (max_lrs) -> - div_name = 'manhattan_plot_' + @name - console.log("div_name:", div_name) - - #console.log("max_lrs is", max_lrs) - - - x_axis_max = Math.ceil(@max_mb/25) * 25 - x_axis_ticks = [] - x_tick = 0 - while (x_tick <= x_axis_max) - x_axis_ticks.push(x_tick) - x_tick += 25 - - - plot_options = - title: @name - seriesDefaults: - showLine: false - markerRenderer: $.jqplot.MarkerRenderer - markerOptions: - style: "filledCircle" - size: 3 - axesDefaults: - tickRenderer: $.jqplot.CanvasAxisTickRenderer - labelRenderer: $.jqplot.CanvasAxisLabelRenderer - axes: - xaxis: - min: 0 - max: x_axis_max - ticks: x_axis_ticks - tickOptions: - angle: 90 - showGridline: false - formatString: '%d' - label: "Megabases" - - - if @name == "1" - plot_options.axes.yaxis = - min: 0 - max: Math.floor(max_lrs + 0.1 * max_lrs) - tickInterval: 1 - label: "LRS" - tickOptions: - formatString: '%d' - showGridline: false - else - plot_options.axes.yaxis = - show: false - min: 0 - max: Math.floor(max_lrs + 0.1 * max_lrs) - tickInterval: 1 - tickOptions: - formatString: '%d' - showGridline: false - - $.jqplot(div_name, [@plot_points], plot_options) - - class Manhattan_Plot - constructor: -> - @max_lrs = 0 - - @chromosomes = {} - @build_chromosomes() - - @display_graphs() - - build_chromosomes: -> - for result in js_data.qtl_results - #if result.locus.chromosome == '1' - chromosome = result.locus.chromosome - if chromosome not of @chromosomes - @chromosomes[chromosome] = new Chromosome(chromosome) - mb = parseInt(result.locus.mb) - if result.lrs > @max_lrs - @max_lrs = result.lrs - @chromosomes[chromosome].process_point(mb, result.lrs) - - display_graphs: -> - ### Call display_graph for each chromosome ### - - # First get everything in the right order - numbered_keys = [] - extra_keys = [] - for key of @chromosomes - if isNaN(key) - extra_keys.push(key) - else - numbered_keys.push(key) - - numbered_keys.sort(sort_number) - extra_keys.sort() - keys = numbered_keys.concat(extra_keys) - console.log("keys are:", keys) - - for key in keys - this_class = "manhattan_plot_segment" - if key != "1" - this_class += " no_y_axis" - html = """
""" - console.log("html is:", html) - $("#manhattan_plots").append(html) - @chromosomes[key].display_graph(@max_lrs) - - $('.jqplot-yaxis').hide() - $('#manhattan_plot_1').find('.jqplot-yaxis').show() - - #$(".jqplot-yaxis").hide() - #$(".jqplot-yaxis-tick").hide() - - - #process_data: -> - # qtl_results = js_data.qtl_results - # #console.log("qtl_results: ", qtl_results) - # @plot_points = [] - # @max_mb = 0 - # for result in qtl_results - # if result.locus.chromosome == '1' - # mb = parseInt(result.locus.mb) - # if mb > @max_mb - # @max_mb = mb - # @plot_points.push([mb, result.lrs]) + largest_chr = 0 + for result in js_data.qtl_results + chr = parseInt(result.chr) + console.log("foo:", chr, typeof(chr)) + if _.isNaN(chr) + console.log("Got NaN") + else + if chr > largest_chr + largest_chr = chr - - - new Permutation_Histogram - new Manhattan_Plot \ No newline at end of file + console.log("largest_chr is:", largest_chr) + + for result in js_data.qtl_results + chr = parseInt(result.chr) + if _.isNaN(chr) + if result.chr == "X" + chr = largest_chr + 1 + else if result.chr == "Y" + chr = largest_chr + 2 + + x_coords.push((chr * 200) + parseFloat(result.Mb)) + y_coords.push(result.lrs_value) + #plot_coordinates.push([x_coord, y_coord]) + + x_max = d3.max(x_coords) + y_max = d3.max(y_coords) + + plot_coordinates = _.zip(x_coords, y_coords) + + console.log(plot_coordinates) + + svg = d3.select("#manhattan_plots") + .append("svg") + .attr("width", 1000) + .attr("height", 800) + #.attr("transform", "translate(0," + y_max + ")") + #.attr("transform", "scale(1,-1)") + + + svg.selectAll("circle") + .data(plot_coordinates) + .enter() + .append("circle") + .attr("cx", (d) => + return (1000 * d[0]/x_max) + #return ((900 * (d[0]/x_max)) + 50) + ) + .attr("cy", (d) => + return 800 - (600 * d[1]/y_max) + ) + .attr("r", 3) \ No newline at end of file diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js index 19a7d051..d231ba5b 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.js +++ b/wqflask/wqflask/static/new/javascript/marker_regression.js @@ -2,225 +2,49 @@ (function() { $(function() { - var Chromosome, Manhattan_Plot, Permutation_Histogram, sort_number; - sort_number = function(a, b) { - return a - b; - }; - Permutation_Histogram = (function() { - - function Permutation_Histogram() { - this.process_data(); - this.display_graph(); - } - - Permutation_Histogram.prototype.process_data = function() { - var bars, floored, key, keys, lrs, lrs_array, _i, _j, _len, _len1; - lrs_array = js_data.lrs_array; - bars = {}; - for (_i = 0, _len = lrs_array.length; _i < _len; _i++) { - lrs = lrs_array[_i]; - floored = Math.floor(lrs); - if (!(floored in bars)) { - bars[floored] = 0; - } - bars[floored] += 1; + var chr, largest_chr, plot_coordinates, result, svg, x_coords, x_max, y_coords, y_max, _i, _j, _len, _len1, _ref, _ref1, + _this = this; + x_coords = []; + y_coords = []; + largest_chr = 0; + _ref = js_data.qtl_results; + for (_i = 0, _len = _ref.length; _i < _len; _i++) { + result = _ref[_i]; + chr = parseInt(result.chr); + console.log("foo:", chr, typeof chr); + if (_.isNaN(chr)) { + console.log("Got NaN"); + } else { + if (chr > largest_chr) { + largest_chr = chr; } - keys = []; - for (key in bars) { - keys.push(key); - } - keys.sort(sort_number); - this.bars_ordered = []; - for (_j = 0, _len1 = keys.length; _j < _len1; _j++) { - key = keys[_j]; - this.bars_ordered.push([parseInt(key), bars[key]]); - } - console.log("bars is:", bars); - console.log("keys are:", keys); - return console.log("bars_ordered are:", this.bars_ordered); - }; - - Permutation_Histogram.prototype.display_graph = function() { - return $.jqplot('permutation_histogram', [this.bars_ordered], { - title: 'Permutation Histogram', - seriesDefaults: { - renderer: $.jqplot.BarRenderer, - rendererOptions: { - barWidth: 15 - }, - pointLabels: { - show: true - } - }, - axesDefaults: { - labelRenderer: $.jqplot.CanvasAxisLabelRenderer - }, - axes: { - xaxis: { - min: 0, - label: "LRS", - pad: 1.1 - }, - yaxis: { - min: 0, - label: "Frequency" - } - } - }); - }; - - return Permutation_Histogram; - - })(); - Chromosome = (function() { - - function Chromosome(name) { - this.name = name; - this.max_mb = 0; - this.plot_points = []; } - - Chromosome.prototype.process_point = function(mb, lrs) { - if (mb > this.max_mb) { - this.max_mb = mb; - } - return this.plot_points.push([mb, lrs]); - }; - - Chromosome.prototype.display_graph = function(max_lrs) { - var div_name, plot_options, x_axis_max, x_axis_ticks, x_tick; - div_name = 'manhattan_plot_' + this.name; - console.log("div_name:", div_name); - x_axis_max = Math.ceil(this.max_mb / 25) * 25; - x_axis_ticks = []; - x_tick = 0; - while (x_tick <= x_axis_max) { - x_axis_ticks.push(x_tick); - x_tick += 25; + } + console.log("largest_chr is:", largest_chr); + _ref1 = js_data.qtl_results; + for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) { + result = _ref1[_j]; + chr = parseInt(result.chr); + if (_.isNaN(chr)) { + if (result.chr === "X") { + chr = largest_chr + 1; + } else if (result.chr === "Y") { + chr = largest_chr + 2; } - plot_options = { - title: this.name, - seriesDefaults: { - showLine: false, - markerRenderer: $.jqplot.MarkerRenderer, - markerOptions: { - style: "filledCircle", - size: 3 - } - }, - axesDefaults: { - tickRenderer: $.jqplot.CanvasAxisTickRenderer, - labelRenderer: $.jqplot.CanvasAxisLabelRenderer - }, - axes: { - xaxis: { - min: 0, - max: x_axis_max, - ticks: x_axis_ticks, - tickOptions: { - angle: 90, - showGridline: false, - formatString: '%d' - }, - label: "Megabases" - } - } - }; - if (this.name === "1") { - plot_options.axes.yaxis = { - min: 0, - max: Math.floor(max_lrs + 0.1 * max_lrs), - tickInterval: 1, - label: "LRS", - tickOptions: { - formatString: '%d', - showGridline: false - } - }; - } else { - plot_options.axes.yaxis = { - show: false, - min: 0, - max: Math.floor(max_lrs + 0.1 * max_lrs), - tickInterval: 1, - tickOptions: { - formatString: '%d', - showGridline: false - } - }; - } - return $.jqplot(div_name, [this.plot_points], plot_options); - }; - - return Chromosome; - - })(); - Manhattan_Plot = (function() { - - function Manhattan_Plot() { - this.max_lrs = 0; - this.chromosomes = {}; - this.build_chromosomes(); - this.display_graphs(); } - - Manhattan_Plot.prototype.build_chromosomes = function() { - var chromosome, mb, result, _i, _len, _ref, _results; - _ref = js_data.qtl_results; - _results = []; - for (_i = 0, _len = _ref.length; _i < _len; _i++) { - result = _ref[_i]; - chromosome = result.locus.chromosome; - if (!(chromosome in this.chromosomes)) { - this.chromosomes[chromosome] = new Chromosome(chromosome); - } - mb = parseInt(result.locus.mb); - if (result.lrs > this.max_lrs) { - this.max_lrs = result.lrs; - } - _results.push(this.chromosomes[chromosome].process_point(mb, result.lrs)); - } - return _results; - }; - - Manhattan_Plot.prototype.display_graphs = function() { - /* Call display_graph for each chromosome - */ - - var extra_keys, html, key, keys, numbered_keys, this_class, _i, _len; - numbered_keys = []; - extra_keys = []; - for (key in this.chromosomes) { - if (isNaN(key)) { - extra_keys.push(key); - } else { - numbered_keys.push(key); - } - } - numbered_keys.sort(sort_number); - extra_keys.sort(); - keys = numbered_keys.concat(extra_keys); - console.log("keys are:", keys); - for (_i = 0, _len = keys.length; _i < _len; _i++) { - key = keys[_i]; - this_class = "manhattan_plot_segment"; - if (key !== "1") { - this_class += " no_y_axis"; - } - html = "
"; - console.log("html is:", html); - $("#manhattan_plots").append(html); - this.chromosomes[key].display_graph(this.max_lrs); - } - $('.jqplot-yaxis').hide(); - return $('#manhattan_plot_1').find('.jqplot-yaxis').show(); - }; - - return Manhattan_Plot; - - })(); - new Permutation_Histogram; - return new Manhattan_Plot; + x_coords.push((chr * 200) + parseFloat(result.Mb)); + y_coords.push(result.lrs_value); + } + x_max = d3.max(x_coords); + y_max = d3.max(y_coords); + plot_coordinates = _.zip(x_coords, y_coords); + console.log(plot_coordinates); + svg = d3.select("#manhattan_plots").append("svg").attr("width", 1000).attr("height", 800); + return svg.selectAll("circle").data(plot_coordinates).enter().append("circle").attr("cx", function(d) { + return 1000 * d[0] / x_max; + }).attr("cy", function(d) { + return 800 - (600 * d[1] / y_max); + }).attr("r", 3); }); }).call(this); diff --git a/wqflask/wqflask/static/new/javascript/marker_regression_old.coffee b/wqflask/wqflask/static/new/javascript/marker_regression_old.coffee new file mode 100644 index 00000000..7eb62ec1 --- /dev/null +++ b/wqflask/wqflask/static/new/javascript/marker_regression_old.coffee @@ -0,0 +1,212 @@ +$ -> + sort_number = (a, b) -> + return a - b + + + class Permutation_Histogram + constructor: -> + @process_data() + @display_graph() + + process_data: -> + # Put the data in a format needed for graphing + # The permutation count for a particular integer range (10-11 or 12-13 for example) + # will be on the y-axis; LRS values will be on the x-axis + lrs_array = js_data.lrs_array + bars = {} + for lrs in lrs_array + floored = Math.floor(lrs) + if floored not of bars + bars[floored] = 0 + bars[floored] += 1 + + # Now we need to take the unordered hash + # And order the keys + keys = [] + for key of bars + keys.push(key) + + keys.sort(sort_number) + + + # Now that we have the ordered keys above + # We can build an array of arrays that jqPlot will use + @bars_ordered = [] + for key in keys + @bars_ordered.push([parseInt(key), bars[key]]) + + console.log("bars is:", bars) + console.log("keys are:", keys) + console.log("bars_ordered are:", @bars_ordered) + #return bars_ordered + + display_graph: -> + + $.jqplot('permutation_histogram', [@bars_ordered], + title: 'Permutation Histogram' + seriesDefaults: + renderer:$.jqplot.BarRenderer + rendererOptions: + barWidth: 15 + pointLabels: + show: true + axesDefaults: + labelRenderer: $.jqplot.CanvasAxisLabelRenderer + axes: + xaxis: + min: 0 + label: "LRS" + pad: 1.1 + yaxis: + min: 0 + label: "Frequency" + ) + + #process_qtl_results = -> + # qtl_results = js_data.qtl_results + + #display_manhattan_plot = -> + + + + #bars_ordered = process_lrs_array() + #display_permutation_histogram(bars_ordered) + + class Chromosome + constructor: (@name) -> + @max_mb = 0 + @plot_points = [] + + process_point: (mb, lrs) -> + if mb > @max_mb + @max_mb = mb + @plot_points.push([mb, lrs]) + + display_graph: (max_lrs) -> + div_name = 'manhattan_plot_' + @name + console.log("div_name:", div_name) + + #console.log("max_lrs is", max_lrs) + + + x_axis_max = Math.ceil(@max_mb/25) * 25 + x_axis_ticks = [] + x_tick = 0 + while (x_tick <= x_axis_max) + x_axis_ticks.push(x_tick) + x_tick += 25 + + + plot_options = + title: @name + seriesDefaults: + showLine: false + markerRenderer: $.jqplot.MarkerRenderer + markerOptions: + style: "filledCircle" + size: 3 + axesDefaults: + tickRenderer: $.jqplot.CanvasAxisTickRenderer + labelRenderer: $.jqplot.CanvasAxisLabelRenderer + axes: + xaxis: + min: 0 + max: x_axis_max + ticks: x_axis_ticks + tickOptions: + angle: 90 + showGridline: false + formatString: '%d' + label: "Megabases" + + + if @name == "1" + plot_options.axes.yaxis = + min: 0 + max: Math.floor(max_lrs + 0.1 * max_lrs) + tickInterval: 1 + label: "LRS" + tickOptions: + formatString: '%d' + showGridline: false + else + plot_options.axes.yaxis = + show: false + min: 0 + max: Math.floor(max_lrs + 0.1 * max_lrs) + tickInterval: 1 + tickOptions: + formatString: '%d' + showGridline: false + + $.jqplot(div_name, [@plot_points], plot_options) + + class Manhattan_Plot + constructor: -> + @max_lrs = 0 + + @chromosomes = {} + @build_chromosomes() + + @display_graphs() + + build_chromosomes: -> + for result in js_data.qtl_results + #if result.locus.chromosome == '1' + chromosome = result.locus.chromosome + if chromosome not of @chromosomes + @chromosomes[chromosome] = new Chromosome(chromosome) + mb = parseInt(result.locus.mb) + if result.lrs > @max_lrs + @max_lrs = result.lrs + @chromosomes[chromosome].process_point(mb, result.lrs) + + display_graphs: -> + ### Call display_graph for each chromosome ### + + # First get everything in the right order + numbered_keys = [] + extra_keys = [] + for key of @chromosomes + if isNaN(key) + extra_keys.push(key) + else + numbered_keys.push(key) + + numbered_keys.sort(sort_number) + extra_keys.sort() + keys = numbered_keys.concat(extra_keys) + console.log("keys are:", keys) + + for key in keys + this_class = "manhattan_plot_segment" + if key != "1" + this_class += " no_y_axis" + html = """
""" + console.log("html is:", html) + $("#manhattan_plots").append(html) + @chromosomes[key].display_graph(@max_lrs) + + $('.jqplot-yaxis').hide() + $('#manhattan_plot_1').find('.jqplot-yaxis').show() + + #$(".jqplot-yaxis").hide() + #$(".jqplot-yaxis-tick").hide() + + + #process_data: -> + # qtl_results = js_data.qtl_results + # #console.log("qtl_results: ", qtl_results) + # @plot_points = [] + # @max_mb = 0 + # for result in qtl_results + # if result.locus.chromosome == '1' + # mb = parseInt(result.locus.mb) + # if mb > @max_mb + # @max_mb = mb + # @plot_points.push([mb, result.lrs]) + + + + new Permutation_Histogram + new Manhattan_Plot \ No newline at end of file diff --git a/wqflask/wqflask/templates/marker_regression.html b/wqflask/wqflask/templates/marker_regression.html index 6b0a4813..0c537d19 100644 --- a/wqflask/wqflask/templates/marker_regression.html +++ b/wqflask/wqflask/templates/marker_regression.html @@ -36,7 +36,7 @@ {% for marker in filtered_results %} {{loop.index}} - {{marker.lrs_value}} + {{marker.lod_score}} {{marker.chr}} {{marker.Mb}} {{marker.name}} @@ -59,6 +59,7 @@ + @@ -66,13 +67,13 @@ - + + - {% endblock %} \ No newline at end of file -- cgit v1.2.3 From 5bfe4e6c590bf0ade2312536f0f8e3ce055e2c75 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 22 Feb 2013 22:18:38 +0000 Subject: Added some to notes --- misc/notes.txt | 12 ++++++++++++ wqflask/base/webqtlConfigLocal.py | 14 +++++++------- wqflask/cfg/zach_settings.py | 2 +- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'wqflask/base') diff --git a/misc/notes.txt b/misc/notes.txt index fd08d9a2..ed0390e1 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -4,11 +4,15 @@ git clone http://github.com/zsloan/genenetwork.git gene Pull from branch in git repository: git pull origin flask(or whatever the branch is) +Add all files in current directory branch to be staged to be committed +git add -A + **** apt-get is an Ubuntu system-wide package manager, while pip is a Python-only system for python packages **** Upgrade system packages for security updates, etc: +apt-get update apt-get upgrade **"apt-cache search" searches for text in the description for various packages, while @@ -70,6 +74,14 @@ python runserver.py =========================================== +Redis things + +Restart redis server: +sudo /etc/init.d/redis_6379 stop +sudo /etc/init.d/redis_6379 start + +=========================================== + Start screen session: byobu -RD (to start) control-a then :multiuser on diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py index 8e3e0bbe..abaeff93 100755 --- a/wqflask/base/webqtlConfigLocal.py +++ b/wqflask/base/webqtlConfigLocal.py @@ -2,18 +2,18 @@ # Environment Variables - private ######################################### -MYSQL_SERVER = 'localhost' -DB_NAME = 'db_webqtl_zas1024' +MYSQL_SERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com' +DB_NAME = 'db_webqtl' DB_USER = 'webqtl' -DB_PASSWD = 'webqtl' +DB_PASSWD = 'f2ZypIflRM' -MYSQL_UPDSERVER = 'localhost' -DB_UPDNAME = 'db_webqtl_zas1024' +MYSQL_UPDSERVER = 'gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com' +DB_UPDNAME = 'db_webqtl' DB_UPDUSER = 'webqtl' -DB_UPDPASSWD = 'webqtl' +DB_UPDPASSWD = 'f2ZypIflRM' GNROOT = '/home/zas1024/gene/' -ROOT_URL = 'http://alexandria.uthsc.edu:91/' +ROOT_URL = 'http://50.16.251.170' PythonPath = '/usr/bin/python' PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' diff --git a/wqflask/cfg/zach_settings.py b/wqflask/cfg/zach_settings.py index 8d3bf4ab..1970bd9c 100644 --- a/wqflask/cfg/zach_settings.py +++ b/wqflask/cfg/zach_settings.py @@ -2,4 +2,4 @@ LOGFILE = """/tmp/flask_gn_log""" TRAP_BAD_REQUEST_ERRORS = True -DB_URI = """mysql://webqtl:webqtl@localhost/db_webqtl_zas1024""" \ No newline at end of file +DB_URI = """mysql://webqtl:f2ZypIflRM@gn.cazhbciu2y1i.us-east-1.rds.amazonaws.com/db_webqtl""" -- cgit v1.2.3 From ed1dd7777b6dc49ed3496668bfd3b3df0d0a0612 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Sat, 2 Mar 2013 00:52:30 +0000 Subject: Created generate_probesetfreeze_file to create the "probesetfreeze data matrix" file corresponding with the muscle dataset Evan was having trouble with --- wqflask/base/data_set.py | 8 +- .../maintenance/generate_probesetfreeze_file.py | 123 ++++ .../new/javascript/dataset_menu_structure.json | 664 +++++++++++---------- 3 files changed, 476 insertions(+), 319 deletions(-) create mode 100644 wqflask/maintenance/generate_probesetfreeze_file.py (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 0b3b5248..c6d67e68 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -29,8 +29,6 @@ import itertools from flask import Flask, g -from htmlgen import HTMLgen2 as HT - import reaper from base import webqtlConfig @@ -252,11 +250,7 @@ class DataSet(object): #self.cursor.execute(query) #self.id, self.name, self.fullname, self.shortname = self.cursor.fetchone() - - - #def genHTML(self, Class='c0dd'): - # return HT.Href(text = HT.Span('%s Database' % self.fullname, Class= "fwb " + Class), - # url= webqtlConfig.INFOPAGEHREF % self.name,target="_blank") + class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py new file mode 100644 index 00000000..95515cea --- /dev/null +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -0,0 +1,123 @@ +#!/usr/bin/python + +from __future__ import absolute_import, print_function, division + +import sys + +sys.path.insert(0, "..") + +import os +import collections +import csv + +import MySQLdb + +from base import webqtlConfig + +from pprint import pformat as pf + + +def get_cursor(): + con = MySQLdb.Connect(db=webqtlConfig.DB_UPDNAME, + host=webqtlConfig.MYSQL_UPDSERVER, + user=webqtlConfig.DB_UPDUSER, + passwd=webqtlConfig.DB_UPDPASSWD) + cursor = con.cursor() + return cursor + +def show_progress(process, counter): + if counter % 1000 == 0: + print("{}: {}".format(process, counter)) + +def get_strains(cursor): + cursor.execute("""select Strain.Name + from Strain, StrainXRef, InbredSet + where Strain.Id = StrainXRef.StrainId and + StrainXRef.InbredSetId = InbredSet.Id + and InbredSet.Name=%s; + """, "BXD") + + strains = [strain[0] for strain in cursor.fetchall()] + print("strains:", pf(strains)) + for strain in strains: + print(" -", strain) + + return strains + +def get_probeset_vals(cursor, dataset_name): + cursor.execute(""" select ProbeSet.Id, ProbeSet.Name + from ProbeSetXRef, + ProbeSetFreeze, + ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = %s and + ProbeSetXRef.ProbeSetId = ProbeSet.Id; + """, dataset_name) + + probesets = cursor.fetchall() + + print("Fetched probesets") + + probeset_vals = collections.OrderedDict() + + for counter, probeset in enumerate(probesets): + cursor.execute(""" select Strain.Name, ProbeSetData.value + from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain + where ProbeSetData.Id = ProbeSetXRef.DataId + and ProbeSetData.StrainId = Strain.Id + and ProbeSetXRef.ProbeSetId = %s + and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId + and ProbeSetFreeze.Name = %s; + """, (probeset[0], dataset_name)) + val_dic = collections.OrderedDict() + vals = cursor.fetchall() + for val in vals: + val_dic[val[0]] = val[1] + + probeset_vals[probeset[1]] = val_dic + show_progress("Querying DB", counter) + + return probeset_vals + +def trim_strains(strains, probeset_vals): + trimmed_strains = [] + #print("probeset_vals is:", pf(probeset_vals)) + first_probeset = list(probeset_vals.itervalues())[0] + for strain in strains: + print("\n**** strain is:", pf(strain)) + print("\n**** first_probeset is:", pf(first_probeset)) + if strain in first_probeset: + trimmed_strains.append(strain) + print("trimmed_strains:", pf(trimmed_strains)) + return trimmed_strains + +def write_data_matrix_file(strains, probeset_vals, filename): + with open(filename, "wb") as fh: + csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) + #print("strains is:", pf(strains)) + csv_writer.writerow(['ID'] + strains) + for counter, probeset in enumerate(probeset_vals): + row_data = [probeset] + for strain in strains: + #print("probeset is: ", pf(probeset_vals[probeset])) + row_data.append(probeset_vals[probeset][strain]) + #print("row_data is: ", pf(row_data)) + csv_writer.writerow(row_data) + show_progress("Writing", counter) + +def main(): + filename = os.path.expanduser("~/gene/wqflask/maintenance/" + + "ProbeSetFreezeId_379_FullName_EPFL_LISP_BXD_CD_Muscle_Affy_Mouse_Gene_1.0_ST_" + + "(Dec11)_RMA_**.txt") + dataset_name = "EPFLMouseMuscleCDRMA1211" + + cursor = get_cursor() + strains = get_strains(cursor) + print("Getting probset_vals") + probeset_vals = get_probeset_vals(cursor, dataset_name) + print("Finished getting probeset_vals") + trimmed_strains = trim_strains(strains, probeset_vals) + write_data_matrix_file(trimmed_strains, probeset_vals, filename) + +if __name__ == '__main__': + main() diff --git a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json index 898ffa02..88bdcb11 100644 --- a/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json +++ b/wqflask/wqflask/static/new/javascript/dataset_menu_structure.json @@ -63,14 +63,14 @@ ] ], "Leaf": [ - [ - "B1LI0809R", - "Barley1 Leaf INOC TTKS (Aug09) RMA" - ], [ "B1LI0809M5", "Barley1 Leaf INOC TTKS (Aug09) MAS5" ], + [ + "B1LI0809R", + "Barley1 Leaf INOC TTKS (Aug09) RMA" + ], [ "B1MI0809M5", "Barley1 Leaf MOCK TTKS (Aug09) MAS5" @@ -113,13 +113,13 @@ "B30_K_1206_M", "Barley1 Leaf MAS 5.0 SCRI (Dec06)" ], - [ - "B30_K_1206_Rn", - "Barley1 Leaf gcRMAn SCRI (Dec06)" - ], [ "B30_K_1206_R", "Barley1 Leaf gcRMA SCRI (Dec06)" + ], + [ + "B30_K_1206_Rn", + "Barley1 Leaf gcRMAn SCRI (Dec06)" ] ], "Phenotypes": [ @@ -211,14 +211,14 @@ }, "AD-cases-controls-Myers": { "Brain": [ - [ - "GSE15222_F_A_RI_0409", - "GSE15222 Human Brain Alzheimer Myers (Apr09) RankInv" - ], [ "GSE15222_F_N_RI_0409", "GSE15222 Human Brain Normal Myers (Apr09) RankInv" ], + [ + "GSE15222_F_A_RI_0409", + "GSE15222 Human Brain Alzheimer Myers (Apr09) RankInv" + ], [ "GSE15222_F_RI_0409", "GSE15222 Human Brain Myers (Apr09) RankInv" @@ -381,43 +381,19 @@ "Amygdala": [ [ "KIN_YSM_AMY_0711", - "KIN/YSM Human AMY Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Caudal Ganglionic Eminence": [ - [ - "KIN_YSM_CGE_0711", - "KIN/YSM Human CGE Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "GN330 Human Amygdala Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Cerebellar Cortex": [ [ "KIN_YSM_CBC_0711", - "KIN/YSM Human CBC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Diencephalon": [ - [ - "KIN_YSM_DIE_0711", - "KIN/YSM Human DIE Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Dorsal Thalamus": [ - [ - "KIN_YSM_DTH_0711", - "KIN/YSM Human DTH Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Cerebellar Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Dorsolateral Prefrontal Cortex": [ [ "KIN_YSM_DFC_0711", - "KIN/YSM Human DFC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Frontal Cerebral Wall": [ - [ - "KIN_YSM_FC_0711", - "KIN/YSM Human FC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Dorsolateral Prefrontal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Genotypes": [ @@ -429,55 +405,31 @@ "Hippocampus": [ [ "KIN_YSM_HIP_0711", - "KIN/YSM Human HIP Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Hippocampus Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Inferior Temporal Cortex": [ [ "KIN_YSM_ITC_0711", - "KIN/YSM Human ITC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Lateral Ganglionic Eminence": [ - [ - "KIN_YSM_LGE_0711", - "KIN/YSM Human LGE Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Medial Ganglionic Eminence": [ - [ - "KIN_YSM_MGE_0711", - "KIN/YSM Human MGE Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Inferior Temporal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Medial Prefrontal Cortex": [ [ "KIN_YSM_MFC_0711", - "KIN/YSM Human MFC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Medial Prefrontal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Mediodorsal Nucleus of Thalamus": [ [ "KIN_YSM_MD_0711", - "KIN/YSM Human MD Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Occipital Cerebral Wall": [ - [ - "KIN_YSM_OC_0711", - "KIN/YSM Human OC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Mediodorsal Nucleus of Thalamus Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Orbital Prefrontal Cortex": [ [ "KIN_YSM_OFC_0711", - "KIN/YSM Human OFC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Parietal Cerebral Wall": [ - [ - "KIN_YSM_PC_0711", - "KIN/YSM Human PC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Orbital Prefrontal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Phenotypes": [ @@ -489,67 +441,49 @@ "Posterior Inferior Parietal Cortex": [ [ "KIN_YSM_IPC_0711", - "KIN/YSM Human IPC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Posterior Inferior Parietal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Posterior Superior Temporal Cortex": [ [ "KIN_YSM_STC_0711", - "KIN/YSM Human STC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Posterior Superior Temporal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Primary Auditory (A1) Cortex": [ [ "KIN_YSM_A1C_0711", - "KIN/YSM Human A1C Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Primary Auditory (A1) Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Primary Motor (M1) Cortex": [ [ "KIN_YSM_M1C_0711", - "KIN/YSM Human M1C Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Primary Motor (M1) Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Primary Somatosensory (S1) Cortex": [ [ "KIN_YSM_S1C_0711", - "KIN/YSM Human S1C Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Primary Somatosensory (S1) Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Primary Visual Cortex": [ [ "KIN_YSM_V1C_0711", - "KIN/YSM Human V1C Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Primary Visual Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Striatum": [ [ "KIN_YSM_STR_0711", - "KIN/YSM Human STR Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Temporal Cerebral Wall": [ - [ - "KIN_YSM_TC_0711", - "KIN/YSM Human TC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Upper (Rostral) Rhombic Lip": [ - [ - "KIN_YSM_URL_0711", - "KIN/YSM Human URL Affy Hu-Exon 1.0 ST (Jul11) Quantile **" - ] - ], - "Ventral Forebrain": [ - [ - "KIN_YSM_VF_0711", - "KIN/YSM Human VF Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Striatum Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ], "Ventrolateral Prefrontal Cortex": [ [ "KIN_YSM_VFC_0711", - "KIN/YSM Human VFC Affy Hu-Exon 1.0 ST (Jul11) Quantile **" + "Human Ventrolateral Prefrontal Cortex Affy Hu-Exon 1.0 ST (Jul11) Quantile" ] ] } @@ -558,12 +492,8 @@ "Macaca-fasicularis": { "Amygdala": [ [ - "INIA_MacFas_AMGc_RMA_0110", - "INIA Macaca fasicularis Amygdala control (Jan10) RMA **" - ], - [ - "INIA_MacFas_AMGe_RMA_0110", - "INIA Macaca fasicularis Amygdala ethanol (Jan10) RMA **" + "INIA_MacFas_AMG_RMA_0110", + "INIA Macaca fasicularis Amygdala (Jan10) RMA **" ] ], "Brain": [ @@ -581,21 +511,13 @@ "Hippocampus": [ [ "INIA_MacFas_Hc_RMA_0110", - "INIA Macaca fasicularis Hippocampus control (Jan10) RMA **" - ], - [ - "INIA_MacFas_He_RMA_0110", - "INIA Macaca fasicularis Hippocampus ethanol (Jan10) RMA **" + "INIA Macaca fasicularis Hippocampus (Jan10) RMA **" ] ], "Nucleus Accumbens": [ [ "INIA_MacFas_Ac_RMA_0110", - "INIA Macaca fasicularis Nucleus Accumbens control (Jan10) RMA **" - ], - [ - "INIA_MacFas_Ae_RMA_0110", - "INIA Macaca fasicularis Nucleus Accumbens ethanol (Jan10) RMA **" + "INIA Macaca fasicularis Nucleus Accumbens (Jan10) RMA **" ] ], "Phenotypes": [ @@ -607,11 +529,7 @@ "Prefrontal Cortex": [ [ "INIA_MacFas_Pf_RMA_0110", - "INIA Macaca fasicularis Prefrontal Cortex control (Jan10) RMA **" - ], - [ - "INIA_MacFas_PfE_RMA_0110", - "INIA Macaca fasicularis Prefrontal Cortex ethanol (Jan10) RMA **" + "INIA Macaca fasicularis Prefrontal Cortex (Jan10) RMA **" ] ] } @@ -629,13 +547,13 @@ "NCI_Agil_Mam_Tum_RMA_0409", "NCI Mammary LMT miRNA v2 (Apr09) RMA" ], - [ - "MA_M_0704_R", - "NCI Mammary mRNA M430 (July04) RMA" - ], [ "MA_M_0704_M", "NCI Mammary mRNA M430 (July04) MAS5" + ], + [ + "MA_M_0704_R", + "NCI Mammary mRNA M430 (July04) RMA" ] ], "Phenotypes": [ @@ -658,6 +576,12 @@ "AXBXA Genotypes" ] ], + "Liver": [ + [ + "GSE16780AB_UCLA_ML0911", + "GSE16780 UCLA Mouse AXB/BXA Liver Affy HT M430A (Sep11) RMA" + ] + ], "Phenotypes": [ [ "AXBXAPublish", @@ -673,13 +597,13 @@ ] ], "Liver": [ - [ - "LVF2_M_0704_R", - "(B6 x BTBR)F2-ob/ob Liver mRNA M430 (Jul04) RMA" - ], [ "LVF2_M_0704_M", "(B6 x BTBR)F2-ob/ob Liver mRNA M430 (Jul04) MAS5" + ], + [ + "LVF2_M_0704_R", + "(B6 x BTBR)F2-ob/ob Liver mRNA M430 (Jul04) RMA" ] ], "Phenotypes": [ @@ -707,13 +631,13 @@ "BRF2_M_0304_P", "OHSU/VA B6D2F2 Brain mRNA M430A (Mar04) PDNN" ], - [ - "BRF2_M_0304_R", - "OHSU/VA B6D2F2 Brain mRNA M430A (Mar04) RMA" - ], [ "BRF2_M_0304_M", "OHSU/VA B6D2F2 Brain mRNA M430A (Mar04) MAS5" + ], + [ + "BRF2_M_0304_R", + "OHSU/VA B6D2F2 Brain mRNA M430A (Mar04) RMA" ] ], "Genotypes": [ @@ -729,6 +653,46 @@ ] ] }, + "B6D2F2-PSU": { + "Genotypes": [ + [ + "B6D2F2-PSUGeno", + "B6D2F2-PSU Genotypes" + ] + ], + "Muscle": [ + [ + "PSU-B6D2F2_0812", + "PSU B6D2F2 Muscle Affy Mouse Genome 430 2.0 (Aug12) RMA **" + ] + ], + "Phenotypes": [ + [ + "B6D2F2-PSUPublish", + "B6D2F2-PSU Published Phenotypes" + ] + ] + }, + "B6D2RI": { + "Genotypes": [ + [ + "B6D2RIGeno", + "B6D2RI Genotypes" + ] + ], + "Hippocampus": [ + [ + "UTHSC_B6D2RI_H_0912", + "UTHSC B6D2RI Aged Hippocampus Affy Mouse Gene 1.0 ST (Sep12) RMA **" + ] + ], + "Phenotypes": [ + [ + "B6D2RIPublish", + "B6D2RI Published Phenotypes" + ] + ] + }, "BDF2-1999": { "Genotypes": [ [ @@ -763,10 +727,6 @@ ] ], "Striatum": [ - [ - "SA_M2_0905_R", - "OHSU/VA B6D2F2 Striatum M430v2 (Sep05) RMA" - ], [ "SA_M2_0905_M", "OHSU/VA B6D2F2 Striatum M430v2 (Sep05) MAS5" @@ -774,33 +734,37 @@ [ "SA_M2_0905_P", "OHSU/VA B6D2F2 Striatum M430v2 (Sep05) PDNN" + ], + [ + "SA_M2_0905_R", + "OHSU/VA B6D2F2 Striatum M430v2 (Sep05) RMA" ] ] }, "BHF2": { "Adipose": [ - [ - "UCLA_BHF2_ADIPOSE_MALE", - "UCLA BHF2 Adipose Male mlratio" - ], [ "UCLA_BHF2_ADIPOSE_FEMALE", "UCLA BHF2 Adipose Female mlratio" ], + [ + "UCLA_BHF2_ADIPOSE_MALE", + "UCLA BHF2 Adipose Male mlratio" + ], [ "UCLA_BHF2_ADIPOSE_0605", "UCLA BHF2 Adipose (June05) mlratio" ] ], "Brain": [ - [ - "UCLA_BHF2_BRAIN_MALE", - "UCLA BHF2 Brain Male mlratio" - ], [ "UCLA_BHF2_BRAIN_FEMALE", "UCLA BHF2 Brain Female mlratio" ], + [ + "UCLA_BHF2_BRAIN_MALE", + "UCLA BHF2 Brain Male mlratio" + ], [ "UCLA_BHF2_BRAIN_0605", "UCLA BHF2 Brain (June05) mlratio" @@ -813,28 +777,28 @@ ] ], "Liver": [ - [ - "UCLA_BHF2_LIVER_MALE", - "UCLA BHF2 Liver Male mlratio" - ], [ "UCLA_BHF2_LIVER_FEMALE", "UCLA BHF2 Liver Female mlratio" ], + [ + "UCLA_BHF2_LIVER_MALE", + "UCLA BHF2 Liver Male mlratio" + ], [ "UCLA_BHF2_LIVER_0605", "UCLA BHF2 Liver (June05) mlratio" ] ], "Muscle": [ - [ - "UCLA_BHF2_MUSCLE_MALE", - "UCLA BHF2 Muscle Male mlratio **" - ], [ "UCLA_BHF2_MUSCLE_FEMALE", "UCLA BHF2 Muscle Female mlratio **" ], + [ + "UCLA_BHF2_MUSCLE_MALE", + "UCLA BHF2 Muscle Male mlratio **" + ], [ "UCLA_BHF2_MUSCLE_0605", "UCLA BHF2 Muscle (June05) mlratio **" @@ -849,6 +813,10 @@ }, "BHHBF2": { "Adipose": [ + [ + "UCLA_BHHBF2_ADIPOSE_2005", + "UCLA BHHBF2 Adipose (2005) mlratio" + ], [ "UCLA_BHHBF2_ADIPOSE_MALE", "UCLA BHHBF2 Adipose Male Only" @@ -856,13 +824,13 @@ [ "UCLA_BHHBF2_ADIPOSE_FEMALE", "UCLA BHHBF2 Adipose Female Only" - ], - [ - "UCLA_BHHBF2_ADIPOSE_2005", - "UCLA BHHBF2 Adipose (2005) mlratio **" ] ], "Brain": [ + [ + "UCLA_BHHBF2_BRAIN_2005", + "UCLA BHHBF2 Brain (2005) mlratio" + ], [ "UCLA_BHHBF2_BRAIN_MALE", "UCLA BHHBF2 Brain Male Only" @@ -870,10 +838,6 @@ [ "UCLA_BHHBF2_BRAIN_FEMALE", "UCLA BHHBF2 Brain Female Only" - ], - [ - "UCLA_BHHBF2_BRAIN_2005", - "UCLA BHHBF2 Brain (2005) mlratio **" ] ], "Genotypes": [ @@ -883,6 +847,10 @@ ] ], "Liver": [ + [ + "UCLA_BHHBF2_LIVER_2005", + "UCLA BHHBF2 Liver (2005) mlratio" + ], [ "UCLA_BHHBF2_LIVER_MALE", "UCLA BHHBF2 Liver Male Only" @@ -890,13 +858,13 @@ [ "UCLA_BHHBF2_LIVER_FEMALE", "UCLA BHHBF2 Liver Female Only" - ], - [ - "UCLA_BHHBF2_LIVER_2005", - "UCLA BHHBF2 Liver (2005) mlratio **" ] ], "Muscle": [ + [ + "UCLA_BHHBF2_MUSCLE_2005", + "UCLA BHHBF2 Muscle (2005) mlratio" + ], [ "UCLA_BHHBF2_MUSCLE_MALE", "UCLA BHHBF2 Muscle Male Only" @@ -904,10 +872,6 @@ [ "UCLA_BHHBF2_MUSCLE_FEMALE", "UCLA BHHBF2 Muscle Female Only" - ], - [ - "UCLA_BHHBF2_MUSCLE_2005", - "UCLA BHHBF2 Muscle (2005) mlratio **" ] ], "Phenotypes": [ @@ -918,6 +882,16 @@ ] }, "BXD": { + "Adrenal Gland": [ + [ + "INIA_Adrenal_RMA_0612", + "INIA Adrenal Affy MoGene 1.0ST (Jun12) RMA" + ], + [ + "INIA_Adrenal_RMA_Ex_0612", + "INIA Adrenal Affy MoGene 1.0ST (Jun12) RMA Exon Level" + ] + ], "Amygdala": [ [ "INIA_AmgCoh_0311", @@ -934,6 +908,10 @@ [ "INIA_Amg_BLA_RMA_F_1110", "INIA Amygdala Affy MoGene 1.0 ST (Nov10) RMA Female" + ], + [ + "INIA_Amg_BLA_Ex-RMA_1110", + "INIA Amygdala Exon Affy MoGene 1.0 ST (Nov10) RMA" ] ], "Brain": [ @@ -945,10 +923,6 @@ "BR_U_1105_P", "UTHSC Brain mRNA U74Av2 (Nov05) PDNN" ], - [ - "BR_U_0805_M", - "UTHSC Brain mRNA U74Av2 (Aug05) MAS5" - ], [ "BR_U_0805_R", "UTHSC Brain mRNA U74Av2 (Aug05) RMA" @@ -957,6 +931,10 @@ "BR_U_0805_P", "UTHSC Brain mRNA U74Av2 (Aug05) PDNN" ], + [ + "BR_U_0805_M", + "UTHSC Brain mRNA U74Av2 (Aug05) MAS5" + ], [ "CB_M_0204_P", "INIA Brain mRNA M430 (Feb04) PDNN" @@ -977,10 +955,6 @@ ] ], "Cerebellum": [ - [ - "CB_M_1004_M", - "SJUT Cerebellum mRNA M430 (Oct04) MAS5" - ], [ "CB_M_1004_R", "SJUT Cerebellum mRNA M430 (Oct04) RMA" @@ -989,6 +963,10 @@ "CB_M_1004_P", "SJUT Cerebellum mRNA M430 (Oct04) PDNN" ], + [ + "CB_M_1004_M", + "SJUT Cerebellum mRNA M430 (Oct04) MAS5" + ], [ "CB_M_1003_M", "SJUT Cerebellum mRNA M430 (Oct03) MAS5" @@ -999,6 +977,10 @@ "Eye_M2_0908_R", "Eye M430v2 (Sep08) RMA" ], + [ + "gn10", + "Eye M430v2 No Mutant/Mutant (Aug12) RMA **" + ], [ "Eye_M2_0908_R_NB", "Eye M430v2 Mutant Gpnmb (Sep08) RMA **" @@ -1007,14 +989,14 @@ "Eye_M2_0908_R_ND", "Eye M430v2 WT Gpnmb (Sep08) RMA **" ], - [ - "Eye_M2_0908_WTWT", - "Eye M430v2 WT WT (Sep08) RMA **" - ], [ "Eye_M2_0908_R_MT", "Eye M430v2 Mutant Tyrp1 (Sep08) RMA **" ], + [ + "Eye_M2_0908_WTWT", + "Eye M430v2 WT WT (Sep08) RMA **" + ], [ "Eye_M2_0908_R_WT", "Eye M430v2 WT Tyrp1 (Sep08) RMA **" @@ -1104,6 +1086,14 @@ [ "UT_ILM_BXD_hipp_RSE_0909", "UTHSC Hippocampus Illumina v6.1 RSE (Sep09) RankInv" + ], + [ + "UTHSC_BXD_HArev3_0912", + "UTHSC BXD Aged Hippocampus rev3 Affy Mouse Gene 1.0 ST (Sep12) RMA **" + ], + [ + "UTHSC_BXD_H_0912", + "UTHSC BXD Aged Hippocampus Affy Mouse Gene 1.0 ST (Sep12) RMA Exon Level **" ] ], "Hypothalamus": [ @@ -1118,6 +1108,10 @@ [ "INIA_Hyp_F_RMA_1110", "INIA Hypothalamus Affy MoGene 1.0 ST (Nov10) Female" + ], + [ + "INIA_Hyp_RMA_Ex-1110", + "INIA Hypothalamus Exon Affy MoGene 1.0 ST (Nov10)" ] ], "Kidney": [ @@ -1137,13 +1131,13 @@ "MA_M2_0806_P", "Mouse Kidney M430v2 Sex Balanced (Aug06) PDNN" ], - [ - "MA_M2_0706_P", - "Mouse Kidney M430v2 (Jul06) PDNN" - ], [ "MA_M2_0706_R", "Mouse Kidney M430v2 (Jul06) RMA" + ], + [ + "MA_M2_0706_P", + "Mouse Kidney M430v2 (Jul06) PDNN" ] ], "Leucocytes": [ @@ -1157,6 +1151,18 @@ "GSE16780_UCLA_ML0911", "GSE16780 UCLA Hybrid MDP Liver Affy HT M430A (Sep11) RMA" ], + [ + "GenEx_BXD_liverEt_M5_0912", + "GenEx BXD EtOH Liver Affy M430 2.0 (Sep12) MAS5 Both Sexes **" + ], + [ + "GenEx_BXD_liverEt_M5F_0912", + "GenEx BXD EtOH Liver Affy M430 2.0 (Sep12) MAS5 Females **" + ], + [ + "GenEx_BXD_liverEt_M5M_0912", + "GenEx BXD EtOH Liver Affy M430 2.0 (Sep12) MAS5 Males **" + ], [ "GenEx_BXD_liverSal_RMA_F_0211", "GenEx BXD Sal Liver Affy M430 2.0 (Feb11) RMA Females **" @@ -1194,18 +1200,22 @@ [ "HZI_0408_M", "HZI Lung M430v2 (Apr08) MAS5" + ], + [ + "HZI_PR8M_Q_0612", + "HZI PR8M-Infected Lungs Agilent4x44 (Apr12) Quantile Females **" ] ], "Midbrain": [ [ - "VUBXDMouseMidBrainQ0212", - "VU BXD Midbrain Agilent SurePrint G3 Mouse GE (Feb12) Quantile" + "VUBXDMouseMidBrainQ0512", + "VU BXD Midbrain Agilent SurePrint G3 Mouse GE (May12) Quantile **" ] ], "Muscle": [ [ "EPFLMouseMuscleRMA1211", - "EPFL/LISP BXD Muscle Affy Mouse Gene 1.0 ST (Dec11) RMA **" + "EPFL/LISP BXD CD+HFD Muscle Affy Mouse Gene 1.0 ST (Dec11) RMA **" ], [ "EPFLMouseMuscleHFDRMA1211", @@ -1218,12 +1228,12 @@ ], "Neocortex": [ [ - "DevNeocortex_ILM6.2P14RInv_1111", - "BIDMC/UTHSC Dev Neocortex P14 ILMv6.2 (Nov11) RankInv **" + "DevNeocortex_ILM6.2P3RInv_1111", + "BIDMC/UTHSC Dev Neocortex P3 ILMv6.2 (Nov11) RankInv" ], [ - "DevNeocortex_ILM6.2P3RInv_1111", - "BIDMC/UTHSC Dev Neocortex P3 ILMv6.2 (Nov11) RankInv **" + "DevNeocortex_ILM6.2P14RInv_1111", + "BIDMC/UTHSC Dev Neocortex P14 ILMv6.2 (Nov11) RankInv" ], [ "HQFNeoc_1210v2_RankInv", @@ -1238,12 +1248,12 @@ "HQF BXD Neocortex ILM6v1.1 (Feb08) RankInv" ], [ - "DevNeocortex_ILM6.2P3RInv_1110", - "BIDMC/UTHSC Dev Neocortex P3 ILMv6.2 (Nov10) RankInv **" + "DevNeocortex_ILM6.2P14RInv_1110", + "BIDMC/UTHSC Dev Neocortex P14 ILMv6.2 (Nov10) RankInv" ], [ - "DevNeocortex_ILM6.2P14RInv_1110", - "BIDMC/UTHSC Dev Neocortex P14 ILMv6.2 (Nov10) RankInv **" + "DevNeocortex_ILM6.2P3RInv_1110", + "BIDMC/UTHSC Dev Neocortex P3 ILMv6.2 (Nov10) RankInv" ] ], "Nucleus Accumbens": [ @@ -1266,11 +1276,23 @@ "BXD Published Phenotypes" ] ], - "Prefrontal Cortex": [ + "Pituitary Gland": [ [ - "VCUEtOH_1206_R", - "VCU BXD PFC EtOH M430 2.0 (Dec06) RMA" + "INIA_PG_RMA_0612", + "INIA Pituitary Affy MoGene 1.0ST (Jun12) RMA" ], + [ + "INIA_PG_RMA_Ex_0612", + "INIA Pituitary Affy MoGene 1.0ST (Jun12) RMA Exon Level" + ] + ], + "Popliteal Lymph Node": [ + [ + "STJ_PLN_0912", + "St Jude BXD Popliteal Lymph Node Affy HT MG-430 PM (Sep12) RMA **" + ] + ], + "Prefrontal Cortex": [ [ "VCUSal_1206_R", "VCU BXD PFC Sal M430 2.0 (Dec06) RMA" @@ -1279,6 +1301,10 @@ "VCUSal_1006_R", "VCU BXD PFC Et vs Sal M430 2.0 (Dec06) Sscore" ], + [ + "VCUEtOH_1206_R", + "VCU BXD PFC EtOH M430 2.0 (Dec06) RMA" + ], [ "VCU_PF_Air_0111_R", "VCU BXD PFC CIE Air M430 2.0 (Jan11) RMA **" @@ -1294,16 +1320,20 @@ ], "Retina": [ [ - "Illum_Retina_BXD_RankInv0410", - "HEI Retina Illumina V6.2 (April 2010) RankInv" + "G2NEI_ILM_Retina_BXD_RI0410", + "Normal HEI Retina (April 2010) RankInv" ], [ - "B6D2ONCILM_0412", - "B6D2 ONC Illumina v6.1 (Apr12) RankInv **" + "Illum_Retina_BXD_RankInv0410", + "Full HEI Retina (April 2010) RankInv" ], [ "ONCRetILM6_0412", - "ONC Retina Illumina V6.2 (Apr12) RankInv **" + "ONC HEI Retina (April 2012) RankInv" + ], + [ + "B6D2ONCILM_0412", + "B6D2 ONC Retina (April 2012) RankInv **" ], [ "HEIONCvsCRetILM6_0911", @@ -1312,26 +1342,6 @@ [ "G2HEIONCRetILM6_0911", "G2 HEI ONC Retina Illumina V6.2 (Sep11) RankInv **" - ], - [ - "HEIONCRetILM6_0911", - "HEI ONC Retina Illumina V6.2 (Sep11) RankInv **" - ], - [ - "ILM_Retina_BXD_F_RankInv1210", - "HEI Retina Females Illumina V6.2 (Dec10) RankInv **" - ], - [ - "ILM_Retina_BXD_M_RankInv1210", - "HEI Retina Males Illumina V6.2 (Dec10) RankInv **" - ], - [ - "ILM_Retina_BXD_FM_RankInv1210", - "HEI Retina F-M Illumina V6.2 (Dec10) RankInv **" - ], - [ - "G2NEI_ILM_Retina_BXD_RI0410", - "G2NEI Retina Illumina V6.2 (April 2010) RankInv **" ] ], "Spleen": [ @@ -1339,10 +1349,22 @@ "UTHSC_SPL_RMA_1210", "UTHSC Affy MoGene 1.0 ST Spleen (Dec10) RMA" ], + [ + "UTHSC_SPL_RMA_1210M", + "UTHSC Affy MoGene 1.0 ST Spleen (Dec10) RMA Males" + ], + [ + "UTHSC_SPL_RMA_1210F", + "UTHSC Affy MoGene 1.0 ST Spleen (Dec10) RMA Females" + ], [ "UTHSC_SPL_RMA_1010", "UTHSC Affy MoGene 1.0 ST Spleen (Oct10) RMA" ], + [ + "UTK_BXDSpl_VST_0110", + "UTK Spleen ILM6.1 (Jan10) VST" + ], [ "IoP_SPL_RMA_0509", "IoP Affy MOE 430v2 Spleen (May09) RMA" @@ -1350,21 +1372,17 @@ [ "Illum_BXD_Spl_1108", "UWA Illumina Spleen (Nov08) RSN **" - ], - [ - "UTK_BXDSpl_VST_0110", - "UTK Spleen ILM6.1 (Jan10) VST" ] ], "Striatum": [ - [ - "DevStriatum_ILM6.2P3RInv_1111", - "BIDMC/UTHSC Dev Striatum P3 ILMv6.2 (Nov11) RankInv **" - ], [ "DevStriatum_ILM6.2P14RInv_1111", "BIDMC/UTHSC Dev Striatum P14 ILMv6.2 (Nov11) RankInv **" ], + [ + "DevStriatum_ILM6.2P3RInv_1111", + "BIDMC/UTHSC Dev Striatum P3 ILMv6.2 (Nov11) RankInv **" + ], [ "UTHSC_Striatum_RankInv_1210", "HQF BXD Striatum ILM6.1 (Dec10v2) RankInv" @@ -1429,10 +1447,6 @@ ] ], "Ventral Tegmental Area": [ - [ - "VCUEtOH_0609_R", - "VCU BXD VTA EtOH M430 2.0 (Jun09) RMA **" - ], [ "VCUSal_0609_R", "VCU BXD VTA Sal M430 2.0 (Jun09) RMA **" @@ -1440,6 +1454,10 @@ [ "VCUEtvsSal_0609_R", "VCU BXD VTA Et vs Sal M430 2.0 (Jun09) Sscore **" + ], + [ + "VCUEtOH_0609_R", + "VCU BXD VTA EtOH M430 2.0 (Jun09) RMA **" ] ] }, @@ -1464,6 +1482,12 @@ "BXH Genotypes" ] ], + "Liver": [ + [ + "GSE16780BXH_UCLA_ML0911", + "GSE16780 UCLA Mouse BXH Liver Affy HT M430A (Sep11) RMA" + ] + ], "Phenotypes": [ [ "BXHPublish", @@ -1474,30 +1498,30 @@ "CTB6F2": { "Adipose": [ [ - "UCLA_CTB6B6CTF2_ADIPOSE_MALE", - "UCLA CTB6B6CTF2 Adipose Male mlratio **" + "UCLA_CTB6B6CTF2_ADIPOSE_2005", + "UCLA CTB6/B6CTF2 Adipose (2005) mlratio" ], [ "UCLA_CTB6B6CTF2_ADIPOSE_FEMALE", "UCLA CTB6B6CTF2 Adipose Female mlratio **" ], [ - "UCLA_CTB6B6CTF2_ADIPOSE_2005", - "UCLA CTB6/B6CTF2 Adipose (2005) mlratio **" + "UCLA_CTB6B6CTF2_ADIPOSE_MALE", + "UCLA CTB6B6CTF2 Adipose Male mlratio **" ] ], "Brain": [ [ - "UCLA_CTB6B6CTF2_BRAIN_MALE", - "UCLA CTB6B6CTF2 Brain Male mlratio **" + "UCLA_CTB6B6CTF2_BRAIN_2005", + "UCLA CTB6/B6CTF2 Brain (2005) mlratio" ], [ "UCLA_CTB6B6CTF2_BRAIN_FEMALE", "UCLA CTB6B6CTF2 Brain Female mlratio **" ], [ - "UCLA_CTB6B6CTF2_BRAIN_2005", - "UCLA CTB6/B6CTF2 Brain (2005) mlratio **" + "UCLA_CTB6B6CTF2_BRAIN_MALE", + "UCLA CTB6B6CTF2 Brain Male mlratio **" ] ], "Genotypes": [ @@ -1508,30 +1532,30 @@ ], "Liver": [ [ - "UCLA_CTB6B6CTF2_LIVER_MALE", - "UCLA CTB6B6CTF2 Liver Male mlratio **" + "UCLA_CTB6B6CTF2_LIVER_2005", + "UCLA CTB6/B6CTF2 Liver (2005) mlratio" ], [ "UCLA_CTB6B6CTF2_LIVER_FEMALE", "UCLA CTB6B6CTF2 Liver Female mlratio **" ], [ - "UCLA_CTB6B6CTF2_LIVER_2005", - "UCLA CTB6/B6CTF2 Liver (2005) mlratio **" + "UCLA_CTB6B6CTF2_LIVER_MALE", + "UCLA CTB6B6CTF2 Liver Male mlratio **" ] ], "Muscle": [ [ - "UCLA_CTB6B6CTF2_MUSCLE_MALE", - "UCLA CTB6B6CTF2 Muscle Male mlratio **" + "UCLA_CTB6B6CTF2_MUSCLE_2005", + "UCLA CTB6/B6CTF2 Muscle (2005) mlratio" ], [ "UCLA_CTB6B6CTF2_MUSCLE_FEMALE", "UCLA CTB6B6CTF2 Muscle Female mlratio **" ], [ - "UCLA_CTB6B6CTF2_MUSCLE_2005", - "UCLA CTB6/B6CTF2 Muscle (2005) mlratio **" + "UCLA_CTB6B6CTF2_MUSCLE_MALE", + "UCLA CTB6B6CTF2 Muscle Male mlratio **" ] ], "Phenotypes": [ @@ -1549,13 +1573,13 @@ ] ], "Hippocampus": [ - [ - "HC_M2CB_1205_R", - "Hippocampus Consortium M430v2 CXB (Dec05) RMA" - ], [ "HC_M2CB_1205_P", "Hippocampus Consortium M430v2 CXB (Dec05) PDNN" + ], + [ + "HC_M2CB_1205_R", + "Hippocampus Consortium M430v2 CXB (Dec05) RMA" ] ], "Phenotypes": [ @@ -1659,10 +1683,6 @@ "Hipp_Illumina_RankInv_0507", "Hippocampus Illumina (May07) RankInv" ], - [ - "Illum_LXS_Hipp_NON_1008", - "Hippocampus Illumina NON (Oct08) RankInv beta" - ], [ "Illum_LXS_Hipp_RSE_1008", "Hippocampus Illumina RSE (Oct08) RankInv beta" @@ -1678,6 +1698,10 @@ [ "Illum_LXS_Hipp_NOS_1008", "Hippocampus Illumina NOS (Oct08) RankInv beta" + ], + [ + "Illum_LXS_Hipp_NON_1008", + "Hippocampus Illumina NON (Oct08) RankInv beta" ] ], "Phenotypes": [ @@ -1687,10 +1711,6 @@ ] ], "Prefrontal Cortex": [ - [ - "VCUEtOH_0806_R", - "VCU LXS PFC EtOH M430A 2.0 (Aug06) RMA **" - ], [ "VCUSal_0806_R", "VCU LXS PFC Sal M430A 2.0 (Aug06) RMA" @@ -1698,6 +1718,10 @@ [ "VCUEt_vs_Sal_0806_R", "VCU LXS PFC Et vs Sal M430A 2.0 (Aug06) Sscore **" + ], + [ + "VCUEtOH_0806_R", + "VCU LXS PFC EtOH M430A 2.0 (Aug06) RMA **" ] ] }, @@ -1709,16 +1733,20 @@ ] ], "Hippocampus": [ - [ - "UMUTAffyExon_0209_RMA_MDP", - "UMUTAffy Hippocampus Exon (Feb09) RMA MDP" - ], [ "HC_M2_0606_MDP", "Hippocampus Consortium M430v2 (Jun06) RMA MDP" + ], + [ + "UMUTAffyExon_0209_RMA_MDP", + "UMUTAffy Hippocampus Exon (Feb09) RMA MDP" ] ], "Liver": [ + [ + "GSE16780MDP_UCLA_ML0911", + "GSE16780 UCLA Mouse MDP Liver Affy HT M430A (Sep11) RMA" + ], [ "JAX_CSB_L_0711", "JAX Liver Affy M430 2.0 (Jul11) MDP" @@ -1921,19 +1949,19 @@ "human": [ [ "AD-cases-controls", - "AD Cases & Controls (Liang)" + "Alzheimer's Disease Brain (Liang)" ], [ "AD-cases-controls-Myers", - "AD Cases & Controls (Myers)" + "Alzheimer's Disease Brain (Myers)" ], [ "CANDLE", - "CANDLE" + "CANDLE Cognitive Development (TUCI)" ], [ "CEPH-2004", - "CEPH Families" + "CEPH Families Cell Lines" ], [ "HB", @@ -1941,11 +1969,11 @@ ], [ "HLC", - "Human Liver Cohort" + "Human Liver Cohort (Merck)" ], [ "HSB", - "KIN/YSM" + "Human Brain Transcriptome (Yale/Kavli)" ] ], "macaque monkey": [ @@ -1971,6 +1999,14 @@ "B6D2F2", "B6D2F2" ], + [ + "B6D2F2-PSU", + "B6D2F2 PSU" + ], + [ + "B6D2RI", + "B6D2RI Aged" + ], [ "BDF2-1999", "BDF2 UCLA" @@ -2300,30 +2336,14 @@ "Amygdala", "Amygdala mRNA" ], - [ - "Caudal Ganglionic Eminence", - "Caudal Ganglionic Eminence mRNA" - ], [ "Cerebellar Cortex", "Cerebellar Cortex mRNA" ], - [ - "Diencephalon", - "Diencephalon mRNA" - ], - [ - "Dorsal Thalamus", - "Dorsal Thalamus mRNA" - ], [ "Dorsolateral Prefrontal Cortex", "Dorsolateral Prefrontal Cortex mRNA" ], - [ - "Frontal Cerebral Wall", - "Frontal Cerebral Wall mRNA" - ], [ "Hippocampus", "Hippocampus mRNA" @@ -2332,14 +2352,6 @@ "Inferior Temporal Cortex", "Inferior Temporal Cortex mRNA" ], - [ - "Lateral Ganglionic Eminence", - "Lateral Ganglionic Eminence mRNA" - ], - [ - "Medial Ganglionic Eminence", - "Medial Ganglionic Eminence mRNA" - ], [ "Medial Prefrontal Cortex", "Medial Prefrontal Cortex mRNA" @@ -2348,18 +2360,10 @@ "Mediodorsal Nucleus of Thalamus", "Mediodorsal Nucleus of Thalamus mRNA" ], - [ - "Occipital Cerebral Wall", - "Occipital Cerebral Wall mRNA" - ], [ "Orbital Prefrontal Cortex", "Orbital Prefrontal Cortex mRNA" ], - [ - "Parietal Cerebral Wall", - "Parietal Cerebral Wall mRNA" - ], [ "Posterior Inferior Parietal Cortex", "Posterior Inferior Parietal Cortex mRNA" @@ -2388,18 +2392,6 @@ "Striatum", "Striatum mRNA" ], - [ - "Temporal Cerebral Wall", - "Temporal Cerebral Wall mRNA" - ], - [ - "Upper (Rostral) Rhombic Lip", - "Upper (Rostral) Rhombic Lip mRNA" - ], - [ - "Ventral Forebrain", - "Ventral Forebrain mRNA" - ], [ "Ventrolateral Prefrontal Cortex", "Ventrolateral Prefrontal Cortex mRNA" @@ -2465,6 +2457,10 @@ [ "Eye", "Eye mRNA" + ], + [ + "Liver", + "Liver mRNA" ] ], "B6BTBRF2": [ @@ -2495,6 +2491,34 @@ "Brain mRNA" ] ], + "B6D2F2-PSU": [ + [ + "Phenotypes", + "Phenotypes" + ], + [ + "Genotypes", + "Genotypes" + ], + [ + "Muscle", + "Muscle mRNA" + ] + ], + "B6D2RI": [ + [ + "Phenotypes", + "Phenotypes" + ], + [ + "Genotypes", + "Genotypes" + ], + [ + "Hippocampus", + "Hippocampus mRNA" + ] + ], "BDF2-1999": [ [ "Phenotypes", @@ -2584,6 +2608,10 @@ "Genotypes", "Genotypes" ], + [ + "Adrenal Gland", + "Adrenal Gland mRNA" + ], [ "Amygdala", "Amygdala mRNA" @@ -2648,6 +2676,14 @@ "Nucleus Accumbens", "Nucleus Accumbens mRNA" ], + [ + "Pituitary Gland", + "Pituitary Gland mRNA" + ], + [ + "Popliteal Lymph Node", + "Popliteal Lymph Node mRNA" + ], [ "Prefrontal Cortex", "Prefrontal Cortex mRNA" @@ -2693,6 +2729,10 @@ [ "Cartilage", "Cartilage mRNA" + ], + [ + "Liver", + "Liver mRNA" ] ], "CTB6F2": [ -- cgit v1.2.3 From 51db16394ebe5936a2078293c676744b7ea74fc6 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 7 Mar 2013 23:42:58 +0000 Subject: Progress bar is now completely working Still need to figure out the problem that occurred with negative p-values after I refactored the LMM code --- wqflask/base/data_set.py | 1 + .../wqflask/marker_regression/marker_regression.py | 3 +- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 203 ++++++++++++++++----- .../new/javascript/show_trait_mapping_tools.coffee | 1 - .../new/javascript/show_trait_mapping_tools.js | 70 +------ .../wqflask/templates/show_trait_progress_bar.html | 2 +- 6 files changed, 159 insertions(+), 121 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index c6d67e68..d474302c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -75,6 +75,7 @@ class Markers(object): def add_pvalues(self, p_values): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value + print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 412d9e35..4ddc89c6 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -497,7 +497,8 @@ class MarkerRegression(object): genotype_matrix, kinship_matrix, REML=True, - refit=False) + refit=False, + temp_data=self.temp_data) Bench().report() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index cc2e32a7..12f7c2ea 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -95,53 +95,53 @@ def calculate_kinship(genotype_matrix, temp_data): kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m) return kinship_matrix -def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): - """ - Performs a basic GWAS scan using the LMM. This function - uses the LMM module to assess association at each SNP and - does some simple cleanup, such as removing missing individuals - per SNP and re-computing the eigen-decomp - - Y - n x 1 phenotype vector - X - n x m SNP matrix - K - n x n kinship matrix - Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K - X0 - n x q covariate matrix - REML - use restricted maximum likelihood - refit - refit the variance component for each SNP - """ - n = X.shape[0] - m = X.shape[1] +def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False, temp_data=None): + """ + Performs a basic GWAS scan using the LMM. This function + uses the LMM module to assess association at each SNP and + does some simple cleanup, such as removing missing individuals + per SNP and re-computing the eigen-decomp + + Y - n x 1 phenotype vector + X - n x m SNP matrix + K - n x n kinship matrix + Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K + X0 - n x q covariate matrix + REML - use restricted maximum likelihood + refit - refit the variance component for each SNP + """ + n = X.shape[0] + m = X.shape[1] - if X0 == None: X0 = np.ones((n,1)) - - # Remove missing values in Y and adjust associated parameters - v = np.isnan(Y) - if v.sum(): - keep = True - v - Y = Y[keep] - X = X[keep,:] - X0 = X0[keep,:] - K = K[keep,:][:,keep] - Kva = [] - Kve = [] + if X0 == None: X0 = np.ones((n,1)) - L = LMM(Y,K,Kva,Kve,X0) - if not refit: L.fit() + # Remove missing values in Y and adjust associated parameters + v = np.isnan(Y) + if v.sum(): + keep = True - v + Y = Y[keep] + X = X[keep,:] + X0 = X0[keep,:] + K = K[keep,:][:,keep] + Kva = [] + Kve = [] - PS = [] - TS = [] + L = LMM(Y,K,Kva,Kve,X0) + if not refit: L.fit() - for i in range(m): - x = X[:,i].reshape((n,1)) - v = np.isnan(x).reshape((-1,)) - if v.sum(): + PS = [] + TS = [] + + for counter in range(m): + x = X[:,counter].reshape((n,1)) + v = np.isnan(x).reshape((-1,)) + if v.sum(): keep = True - v xs = x[keep,:] - if xs.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue + if xs.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue Ys = Y[keep] X0s = X0[keep,:] @@ -150,19 +150,124 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): if refit: Ls.fit(X=xs) else: Ls.fit() ts,ps = Ls.association(xs,REML=REML) - else: - if x.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue + else: + if x.var() == 0: + PS.append(np.nan) + TS.append(np.nan) + continue if refit: L.fit(X=x) ts,ps = L.association(x,REML=REML) - PS.append(ps) - TS.append(ts) + percent_complete = 45 + int(round((counter/m)*55)) + print("Percent complete: ", percent_complete) + temp_data.store("percent_complete", percent_complete) - return TS,PS + PS.append(ps) + TS.append(ts) + + return TS,PS + +#def GWAS(pheno_vector, +# genotype_matrix, +# kinship_matrix, +# kinship_eigenvals=None, +# kinship_eigenvectors=None, +# covariate_matrix=None, +# restricted_max_likelihood=True, +# refit=False, +# temp_data=None): +# """ +# Performs a basic GWAS scan using the LMM. This function +# uses the LMM module to assess association at each SNP and +# does some simple cleanup, such as removing missing individuals +# per SNP and re-computing the eigen-decomp +# +# Y - n x 1 phenotype vector +# X - n x m SNP matrix +# K - n x n kinship matrix +# Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K +# X0 - n x q covariate matrix +# REML - use restricted maximum likelihood +# refit - refit the variance component for each SNP +# +# """ +# +# assert temp_data, "You forgot to pass in temp_data" +# +# if kinship_eigenvals == None: +# kinship_eigenvals = [] +# if kinship_eigenvectors == None: +# kinship_eigenvectors = [] +# +# n = genotype_matrix.shape[0] +# m = genotype_matrix.shape[1] +# +# if covariate_matrix == None: +# covariate_matrix = np.ones((n,1)) +# +# # Remove missing values in Y and adjust associated parameters +# pheno_not_number = np.isnan(pheno_vector) +# if pheno_not_number.sum(): +# keep = True - pheno_not_number +# pheno_vector = pheno_vector[keep] +# genotype_matrix = genotype_matrix[keep,:] +# covariate_matrix = covariate_matrix[keep,:] +# kinship_matrix = kinship_matrix[keep,:][:,keep] +# kinship_eigenvals = [] +# kinship_eigenvectors = [] +# +# lmm_ob = LMM(pheno_vector, +# kinship_matrix, +# kinship_eigenvals, +# kinship_eigenvectors, +# covariate_matrix) +# if not refit: +# lmm_ob.fit() +# +# p_value_matrix = [] +# t_stats_matrix = [] +# +# for counter in range(m): +# #pheno_vector_2 = geno_vector[:, counter] +# #x = pheno_vector_2.reshape((n,1)) +# x = genotype_matrix[:,counter].reshape((n,1)) +# v = np.isnan(x).reshape((-1,)) +# if v.sum(): +# keep = True - v +# xs = x[keep,:] +# if xs.var() == 0: +# p_value_matrix.append(np.nan) +# t_stats_matrix.append(np.nan) +# continue +# +# pheno_vector_2 = pheno_vector[keep] +# covariate_matrix_2 = covariate_matrix[keep,:] +# kinship_matrix_2 = kinship_matrix[keep,:][:,keep] +# lmm_ob_2 = LMM(pheno_vector, kinship_matrix, covariate_matrix=covariate_matrix) +# if refit: +# lmm_ob_2.fit(X=xs) +# else: +# lmm_ob_2.fit() +# t_stats, p_values = lmm_ob_2.association(xs, REML=restricted_max_likelihood) +# else: +# if x.var() == 0: +# p_value_matrix.append(np.nan) +# t_stats_matrix.append(np.nan) +# continue +# +# if refit: +# lmm_ob.fit(X=x) +# t_stats,p_values = lmm_ob.association(x, REML=restricted_max_likelihood) +# +# p_value_matrix.append(p_values) +# t_stats_matrix.append(t_stats) +# +# percent_complete = 45 + int(round((counter/m)*55)) +# print("Percent complete: ", percent_complete) +# temp_data.store("percent_complete", percent_complete) +# +# return p_value_matrix, t_stats_matrix class LMM: diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee index 35572f67..157f56a9 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee @@ -30,7 +30,6 @@ $ -> $("#marker_regression").click(() => $("#progress_bar_container").modal() - url = "/marker_regression" form_data = $('#trait_data_form').serialize() console.log("form_data is:", form_data) diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 78459692..c8b0aa7b 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -1,74 +1,6 @@ // Generated by CoffeeScript 1.4.0 (function() { - $(function() { - var composite_mapping_fields, get_progress, submit_special, toggle_enable_disable, - _this = this; - submit_special = function() { - var url; - console.log("In submit_special"); - console.log("this is:", this); - console.log("$(this) is:", $(this)); - url = $(this).data("url"); - console.log("url is:", url); - $("#trait_data_form").attr("action", url); - return $("#trait_data_form").submit(); - }; - get_progress = function() { - var params, params_str, temp_uuid, url, - _this = this; - console.log("temp_uuid:", $("#temp_uuid").val()); - temp_uuid = $("#temp_uuid").val(); - params = { - key: temp_uuid - }; - params_str = $.param(params); - url = "/get_temp_data?" + params_str; - console.log("url:", url); - $.ajax({ - type: "GET", - url: url, - success: function(progress_data) { - console.log("in get_progress data:", progress_data); - console.log(progress_data['percent_complete'] + "%"); - return $('#marker_regression_progress').css("width", progress_data['percent_complete'] + "%"); - } - }); - return false; - }; - $("#marker_regression").click(function() { - var form_data, url; - $("#progress_bar_container").modal(); - url = "/marker_regression"; - form_data = $('#trait_data_form').serialize(); - console.log("form_data is:", form_data); - $.ajax({ - type: "POST", - url: url, - data: form_data, - success: function(data) { - clearInterval(_this.my_timer); - $('#progress_bar_container').modal('hide'); - return $("body").html(data); - } - }); - console.log("settingInterval"); - _this.my_timer = setInterval(get_progress, 1000); - return false; - }); - composite_mapping_fields = function() { - return $(".composite_fields").toggle(); - }; - $("#use_composite_choice").change(composite_mapping_fields); - toggle_enable_disable = function(elem) { - return $(elem).prop("disabled", !$(elem).prop("disabled")); - }; - $("#choose_closet_control").change(function() { - return toggle_enable_disable("#control_locus"); - }); - return $("#display_all_lrs").change(function() { - return toggle_enable_disable("#suggestive_lrs"); - }); - }); + }).call(this); diff --git a/wqflask/wqflask/templates/show_trait_progress_bar.html b/wqflask/wqflask/templates/show_trait_progress_bar.html index 2984cc02..eff5c391 100644 --- a/wqflask/wqflask/templates/show_trait_progress_bar.html +++ b/wqflask/wqflask/templates/show_trait_progress_bar.html @@ -3,7 +3,7 @@

Loading...

- - - {% if dataset.group.genotype.Mbmap %} -
- -
- - -
-
- {% endif %} +
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index c8432877..46433430 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -31,6 +31,7 @@ from utility import temp_data from wqflask.dataSharing import SharingInfo, SharingInfoPage from base import webqtlFormData +from utility.benchmark import Bench from pprint import pformat as pf @@ -168,25 +169,18 @@ def marker_regression_page(): start_vars[key] = value version = "v5" - print("version is:", version) key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) - result = Redis.get(key) + with Bench("Loading cache"): + result = Redis.get(key) - print("************************ Starting result *****************") + #print("************************ Starting result *****************") #print("result is [{}]: {}".format(type(result), result)) - print("************************ Ending result ********************") + #print("************************ Ending result ********************") if result: - with open("/tmp/result", "w") as fh: - fh.write(result) print("Cache hit!!!") - import __builtin__ - import reaper - __builtin__.Dataset = reaper.Dataset - #result = yaml.load(result) - result = pickle.loads(result) - print("Done loading yaml") - + with Bench("Loading results"): + result = pickle.loads(result) else: print("Cache miss!!!") template_vars = marker_regression.MarkerRegression(start_vars, temp_uuid) @@ -197,14 +191,17 @@ def marker_regression_page(): result = template_vars.__dict__ - for item in template_vars.__dict__.keys(): - print(" ---**--- {}: {}".format(type(item), item)) + #for item in template_vars.__dict__.keys(): + # print(" ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) #causeerror Redis.set(key, pickle.dumps(result)) Redis.expire(key, 60*60) + + with Bench("Rendering template"): + rendered_template = render_template("marker_regression.html", **result) - return render_template("marker_regression.html", **result) + return rendered_template @app.route("/corr_compute", methods=('POST',)) -- cgit v1.2.3 From b316d0a842ca83049aeb8e374db70831bcb4a928 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 12 Mar 2013 18:35:47 -0500 Subject: Added the script generating probesetfreeze data matrix files The script was created to regenerate a bad file for Evan --- wqflask/base/generate_probesetfreeze_file.py | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 wqflask/base/generate_probesetfreeze_file.py (limited to 'wqflask/base') diff --git a/wqflask/base/generate_probesetfreeze_file.py b/wqflask/base/generate_probesetfreeze_file.py new file mode 100644 index 00000000..a0ff804b --- /dev/null +++ b/wqflask/base/generate_probesetfreeze_file.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import, print_function, division +import os +import math + +import json +import itertools + +from flask import Flask, g + +from base import webqtlConfig +from dbFunction import webqtlDatabaseFunction +from utility import webqtlUtil + +from MySQLdb import escape_string as escape +from pprint import pformat as pf + + +query = """ select ProbeSet.Name + from ProbeSetXRef, + ProbeSetFreeze, + ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and + ProbeSetFreeze.Name = "EPFLMouseMuscleCDRMA1211" and + ProbeSetXRef.ProbeSetId = ProbeSet.Id; + """ + +markers = g.db.execute(query).fetchall() +print("markers: ", pf(markers)) + +if __name__ == '__main__': + main() \ No newline at end of file -- cgit v1.2.3 From 45cefba200ad7d0d39495aaf71de217bda3e41a3 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 19 Mar 2013 20:09:58 +0000 Subject: Searches work for the full access human datasets with mRNA assay searches, and the trait page can be loaded Need to get marker regression page working with human data now --- wqflask/base/data_set.py | 5 ++--- .../wqflask/marker_regression/marker_regression.py | 3 +++ wqflask/wqflask/search_results.py | 22 ++++++++++++++++++---- wqflask/wqflask/show_trait/show_trait.py | 4 +++- wqflask/wqflask/templates/index_page.html | 4 ++-- 5 files changed, 28 insertions(+), 10 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 10f047f8..71efc9b2 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -185,9 +185,8 @@ class DataSet(object): self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype self.species = species.TheSpecies(self) - - - + + def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" return None diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 9278c80d..1a0efcee 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -65,6 +65,9 @@ class MarkerRegression(object): pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) genotype_matrix = np.array(trimmed_genotype_data).T + print("pheno_vector is: ", pf(pheno_vector)) + print("genotype_matrix is: ", pf(genotype_matrix)) + t_stats, p_values = lmm.run( pheno_vector, genotype_matrix, diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index fde30944..080568f0 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -57,15 +57,18 @@ class SearchResultPage(): # self.dataset_groups = map(lambda x: x[1], results) # self.dataset_group_ids = map(lambda x: x[2], results) #else: - if kw['q']: + + self.results = [] + + if 'q' in kw: self.quick_search = True self.search_terms = kw['q'] + self.quick_search() else: self.quick_search = False self.search_terms = kw['search_terms'] self.dataset = create_dataset(kw['dataset']) - - self.search() + self.search() self.gen_search_result() @@ -95,13 +98,24 @@ class SearchResultPage(): self.trait_list.append(this_trait) self.dataset.get_trait_info(self.trait_list, species) + + def quick_search(self): + + return True + + #search_gene + #search_geno + #searhch_pheno + #search_mrn + #searhc_publish def search(self): self.search_terms = parser.parse(self.search_terms) print("After parsing:", self.search_terms) - self.results = [] + + for a_search in self.search_terms: print("[kodak] item is:", pf(a_search)) search_term = a_search['search_term'] diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 12d512b2..5c064359 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -1552,8 +1552,10 @@ class ShowTrait(object): all_samples_ordered = (self.dataset.group.parlist + self.dataset.group.f1list + self.dataset.group.samplelist) - else: + elif self.dataset.group.f1list: all_samples_ordered = self.dataset.group.f1list + self.dataset.group.samplelist + else: + all_samples_ordered = self.dataset.group.samplelist this_trait_samples = set(this_trait.data.keys()) diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 8a05173f..46a307b7 100644 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -34,10 +34,10 @@
- +
:]]" - # - # # This adds a clause to the query that matches the search term - # # against each field in the search_fields tuple - # fields_clause = [] - # for field in self.search_fields: - # fields_clause.append('''%s REGEXP "%s"''' % (field, search_term)) - # fields_clause = "(%s) and " % ' OR '.join(fields_clause) - # - # return fields_clause - def get_fields_clause(self): """Generate clause for WHERE portion of query""" @@ -231,13 +200,13 @@ class PhenotypeSearch(DoSearch): # This adds a clause to the query that matches the search term # against each field in the search_fields tuple - fields_clause = "MATCH(" - fields_clause += ",".join(self.search_fields) + ") " - fields_clause += "AGAINST('{}' IN BOOLEAN MODE)".format(self.search_term[0]) + fields_clause = [] + for field in self.search_fields: + fields_clause.append('''%s REGEXP "%s"''' % (field, search_term)) + fields_clause = "(%s) and " % ' OR '.join(fields_clause) return fields_clause - def compile_final_query(self, from_clause = '', where_clause = ''): """Generates the final query string""" @@ -265,61 +234,56 @@ class PhenotypeSearch(DoSearch): query = self.compile_final_query(where_clause = self.get_fields_clause()) return self.execute(query) - - -class QuickPhenotypeSearch(PhenotypeSearch): - """A search across all phenotype datasets""" - - DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch" - - base_query = """SELECT Species.Name as Species_Name, - PublishFreeze.FullName as Dataset_Name, - PublishFreeze.Name, - PublishXRef.Id, - PublishFreeze.createtime as thistable, - Publication.PubMed_ID as Publication_PubMed_ID, - Phenotype.Post_publication_description as Phenotype_Name - FROM Phenotype, - PublishFreeze, - Publication, - PublishXRef, - InbredSet, - Species """ - - search_fields = ('Phenotype.Post_publication_description', - 'Phenotype.Pre_publication_description', - 'Phenotype.Pre_publication_abbreviation', - 'Phenotype.Post_publication_abbreviation', - 'Phenotype.Lab_code', - 'Publication.PubMed_ID', - 'Publication.Abstract', - 'Publication.Title', - 'Publication.Authors') - - def compile_final_query(self, where_clause = ''): - """Generates the final query string""" - query = (self.base_query + - """WHERE (%s) and - PublishXRef.PhenotypeId = Phenotype.Id and - PublishXRef.PublicationId = Publication.Id and - PublishXRef.InbredSetId = InbredSet.Id and - InbredSet.SpeciesId = Species.Id""" % where_clause) - - print("query is:", pf(query)) - - return query +#class QuickPhenotypeSearch(PhenotypeSearch): +# """A search across all phenotype datasets""" +# +# DoSearch.search_types['quick_phenotype'] = "QuickPhenotypeSearch" +# +# base_query = """SELECT Species.Name as Species_Name, +# PublishFreeze.FullName as Dataset_Name, +# PublishFreeze.Name, +# PublishXRef.Id, +# PublishFreeze.createtime as thistable, +# Publication.PubMed_ID as Publication_PubMed_ID, +# Phenotype.Post_publication_description as Phenotype_Name +# FROM Phenotype, +# PublishFreeze, +# Publication, +# PublishXRef, +# InbredSet, +# Species """ +# +# search_fields = ('Phenotype.Post_publication_description', +# 'Phenotype.Pre_publication_description', +# 'Phenotype.Pre_publication_abbreviation', +# 'Phenotype.Post_publication_abbreviation', +# 'Phenotype.Lab_code', +# 'Publication.PubMed_ID', +# 'Publication.Abstract', +# 'Publication.Title', +# 'Publication.Authors') +# +# def compile_final_query(self, where_clause = ''): +# """Generates the final query string""" +# +# query = (self.base_query + +# """WHERE %s +# PublishXRef.PhenotypeId = Phenotype.Id and +# PublishXRef.PublicationId = Publication.Id and +# PublishXRef.InbredSetId = InbredSet.Id and +# InbredSet.SpeciesId = Species.Id""" % where_clause) +# +# print("query is:", pf(query)) +# +# return query def run(self): """Generates and runs a search across all phenotype datasets""" query = self.compile_final_query(where_clause = self.get_fields_clause()) - with Bench("Doing quick phenotype search"): - results = self.execute(query) - - return results - + return self.execute(query) class GenotypeSearch(DoSearch): """A search within a genotype dataset""" diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 6c85afe9..c3555e8f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -6,23 +6,30 @@ from base import data_set #import create_dataset from pprint import pformat as pf import string +import sys import os import collections import numpy as np +from scipy import linalg #from redis import Redis -from utility import Plot, Bunch + from base.trait import GeneralTrait from base import data_set from base import species -from utility import helper_functions from base import webqtlConfig from wqflask.my_pylmm.data import prep_data from wqflask.my_pylmm.pyLMM import lmm +from wqflask.my_pylmm.pyLMM import input +from utility import helper_functions +from utility import Plot, Bunch from utility import temp_data +from utility.benchmark import Bench + + class MarkerRegression(object): def __init__(self, start_vars, temp_uuid): @@ -52,28 +59,135 @@ class MarkerRegression(object): ) + def gen_data(self, tempdata): """Generates p-values for each marker""" - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - genotype_matrix = np.array(trimmed_genotype_data).T - print("pheno_vector is: ", pf(pheno_vector)) - print("genotype_matrix is: ", pf(genotype_matrix)) + file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) + + plink_input = input.plink(file_base, type='b') + + + pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) + pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) + covariate_matrix = np.ones((pheno_vector.shape[0],1)) + kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") + kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) + + refit = False + + v = np.isnan(pheno_vector) + keep = True - v + keep = keep.reshape((len(keep),)) + eigen_values = [] + eigen_vectors = [] + + + print("pheno_vector shape is: ", pf(pheno_vector.shape)) + + #print("pheno_vector is: ", pf(pheno_vector)) + #print("kinship_matrix is: ", pf(kinship_matrix)) + + if v.sum(): + pheno_vector = pheno_vector[keep] + print("pheno_vector shape is now: ", pf(pheno_vector.shape)) + covariate_matrix = covariate_matrix[keep,:] + print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) + print("len(keep) is: ", pf(keep.shape)) + kinship_matrix = kinship_matrix[keep,:][:,keep] + + #if not v.sum(): + # eigen_values = np.fromfile(file_base + ".kin.kva") + # eigen_vectors = np.fromfile(file_base + ".kin.kve") + + #print("eigen_values is: ", pf(eigen_values)) + #print("eigen_vectors is: ", pf(eigen_vectors)) + + n = kinship_matrix.shape[0] + lmm_ob = lmm.LMM(pheno_vector, + kinship_matrix, + eigen_values, + eigen_vectors, + covariate_matrix) + lmm_ob.fit() + + # Buffers for pvalues and t-stats + p_values = [] + t_statistics = [] + count = 0 + + plink_input.getSNPIterator() + print("# snps is: ", pf(plink_input.numSNPs)) + with Bench("snp iterator loop"): + for snp, this_id in plink_input: + #if count > 10000: + # break + count += 1 + + x = snp[keep].reshape((n,1)) + #x[[1,50,100,200,3000],:] = np.nan + v = np.isnan(x).reshape((-1,)) + + # Check SNPs for missing values + if v.sum(): + keeps = True - v + xs = x[keeps,:] + # If no variation at this snp or all genotypes missing + if keeps.sum() <= 1 or xs.var() <= 1e-6: + p_values.append(np.nan) + t_statistics.append(np.nan) + continue + + # Its ok to center the genotype - I used options.normalizeGenotype to + # force the removal of missing genotypes as opposed to replacing them with MAF. + + #if not options.normalizeGenotype: + # xs = (xs - xs.mean()) / np.sqrt(xs.var()) + + filtered_pheno = pheno_vector[keeps] + filtered_covariate_matrix = covariate_matrix[keeps,:] + filtered_kinship_matrix = kinship_matrix[keeps,:][:,keeps] + filtered_lmm_ob = lmm.LMM(filtered_pheno,filtered_kinship_matrix,X0=filtered_covariate_matrix) + if refit: + filtered_lmm_ob.fit(X=xs) + else: + #try: + filtered_lmm_ob.fit() + #except: pdb.set_trace() + ts,ps,beta,betaVar = Ls.association(xs,returnBeta=True) + else: + if x.var() == 0: + p_values.append(np.nan) + t_statistics.append(np.nan) + continue + + if refit: + lmm_ob.fit(X=x) + ts,ps,beta,betaVar = lmm_ob.association(x) + p_values.append(ps) + t_statistics.append(ts) + - t_stats, p_values = lmm.run( - pheno_vector, - genotype_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=tempdata - ) + #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] + # + #no_val_samples = self.identify_empty_samples() + #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + # + #genotype_matrix = np.array(trimmed_genotype_data).T + # + #print("pheno_vector is: ", pf(pheno_vector)) + #print("genotype_matrix is: ", pf(genotype_matrix)) + # + #t_stats, p_values = lmm.run( + # pheno_vector, + # genotype_matrix, + # restricted_max_likelihood=True, + # refit=False, + # temp_data=tempdata + #) + print("p_values is: ", pf(p_values)) self.dataset.group.markers.add_pvalues(p_values) #self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] @@ -118,3 +232,5 @@ class MarkerRegression(object): new_genotypes.append(genotype) trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data + + diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py index b8b76fd0..35662072 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py @@ -41,7 +41,8 @@ class plink: # the programmer to turn off the kinship reading. self.readKFile = readKFile - if self.kFile: self.K = self.readKinship(self.kFile) + if self.kFile: + self.K = self.readKinship(self.kFile) elif os.path.isfile("%s.kin" % fbase): self.kFile = "%s.kin" %fbase if self.readKFile: @@ -54,7 +55,7 @@ class plink: self.fhandle = None self.snpFileHandle = None - + def __del__(self): if self.fhandle: self.fhandle.close() if self.snpFileHandle: self.snpFileHandle.close() @@ -160,7 +161,8 @@ class plink: # reorder to match self.indivs D = {} L = [] - for i in range(len(keys)): D[keys[i]] = i + for i in range(len(keys)): + D[keys[i]] = i for i in range(len(self.indivs)): if not D.has_key(self.indivs[i]): continue diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 163b876a..f1f195d6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -26,42 +26,42 @@ from scipy import stats from pprint import pformat as pf -from utility.benchmark import Bench - -#np.seterr('raise') - -def run(pheno_vector, - genotype_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=None): - """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics - - restricted_max_likelihood -- whether to use restricted max likelihood; True or False - refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) - - """ - - with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, temp_data) - - with Bench("Create LMM object"): - lmm_ob = LMM(pheno_vector, kinship_matrix) - - with Bench("LMM_ob fitting"): - lmm_ob.fit() - - with Bench("Doing GWAS"): - t_stats, p_values = GWAS(pheno_vector, - genotype_matrix, - kinship_matrix, - restricted_max_likelihood=True, - refit=False, - temp_data=temp_data) - Bench().report() - return t_stats, p_values +#from utility.benchmark import Bench +# +##np.seterr('raise') +# +#def run(pheno_vector, +# genotype_matrix, +# restricted_max_likelihood=True, +# refit=False, +# temp_data=None): +# """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics +# +# restricted_max_likelihood -- whether to use restricted max likelihood; True or False +# refit -- whether to refit the variance component for each marker +# temp_data -- TempData object that stores the progress for each major step of the +# calculations ("calculate_kinship" and "GWAS" take the majority of time) +# +# """ +# +# with Bench("Calculate Kinship"): +# kinship_matrix = calculate_kinship(genotype_matrix, temp_data) +# +# with Bench("Create LMM object"): +# lmm_ob = LMM(pheno_vector, kinship_matrix) +# +# with Bench("LMM_ob fitting"): +# lmm_ob.fit() +# +# with Bench("Doing GWAS"): +# t_stats, p_values = GWAS(pheno_vector, +# genotype_matrix, +# kinship_matrix, +# restricted_max_likelihood=True, +# refit=False, +# temp_data=temp_data) +# Bench().report() +# return t_stats, p_values def matrixMult(A,B): @@ -72,8 +72,8 @@ def matrixMult(A,B): except AttributeError: return np.dot(A,B) - print("A is:", pf(A.shape)) - print("B is:", pf(B.shape)) + #print("A is:", pf(A.shape)) + #print("B is:", pf(B.shape)) # If the matrices are in Fortran order then the computations will be faster # when using dgemm. Otherwise, the function will copy the matrix and that takes time. @@ -234,237 +234,245 @@ def GWAS(pheno_vector, class LMM: - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. - - """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): - - """ - The constructor takes a phenotype vector or array of size n. - It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. - """ - - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) - self.verbose = verbose - - #x = Y != -9 - x = True - np.isnan(Y) - if not x.sum() == len(Y): - if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) - Y = Y[x] - K = K[x,:][:,x] - X0 = X0[x,:] - Kva = [] - Kve = [] - self.nonmissing = x - - if len(Kva) == 0 or len(Kve) == 0: - if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - Kva,Kve = linalg.eigh(K) - end = time.time() - if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - - self.K = K - self.Kva = Kva - self.Kve = Kve - print("self.Kva is: ", pf(self.Kva)) - print("self.Kve is: ", pf(self.Kve)) - self.Y = Y - self.X0 = X0 - self.N = self.K.shape[0] - - if sum(self.Kva < 1e-6): - if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) - self.Kva[self.Kva < 1e-6] = 1e-6 - - self.transform() - - def transform(self): - - """ - Computes a transformation on the phenotype vector and the covariate matrix. - The transformation is obtained by left multiplying each parameter by the transpose of the - eigenvector matrix of K (the kinship). - """ + """ + This is a simple version of EMMA/fastLMM. + The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) + and to optimize this model by finding the maximum-likelihood estimates for the model parameters. + There are three model parameters: heritability (h), covariate coefficients (beta) and the total + phenotypic variance (sigma). + Heritability as defined here is the proportion of the total variance (sigma) that is attributed to + the kinship matrix. + + For simplicity, we assume that everything being input is a numpy array. + If this is not the case, the module may throw an error as conversion from list to numpy array + is not done consistently. + + """ + def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): + + """ + The constructor takes a phenotype vector or array of size n. + It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. + If they are not provided, the constructor will calculate them. + X0 is an optional covariate matrix of size n x q, where there are q covariates. + When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ + + if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + self.verbose = verbose + + #x = Y != -9 + x = True - np.isnan(Y) + if not x.sum() == len(Y): + if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) + Y = Y[x] + K = K[x,:][:,x] + X0 = X0[x,:] + Kva = [] + Kve = [] + self.nonmissing = x + + if len(Kva) == 0 or len(Kve) == 0: + if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) + begin = time.time() + Kva,Kve = linalg.eigh(K) + end = time.time() + if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) + + self.K = K + self.Kva = Kva + self.Kve = Kve + print("self.Kva is: ", pf(self.Kva)) + print("self.Kve is: ", pf(self.Kve)) + self.Y = Y + self.X0 = X0 + self.N = self.K.shape[0] + + if sum(self.Kva < 1e-6): + if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) + self.Kva[self.Kva < 1e-6] = 1e-6 + + self.transform() + + def transform(self): + + """ + Computes a transformation on the phenotype vector and the covariate matrix. + The transformation is obtained by left multiplying each parameter by the transpose of the + eigenvector matrix of K (the kinship). + """ + + self.Yt = matrixMult(self.Kve.T, self.Y) + self.X0t = matrixMult(self.Kve.T, self.X0) + self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) + self.q = self.X0t.shape[1] + + def getMLSoln(self,h,X): + + """ + Obtains the maximum-likelihood estimates for the covariate coefficients (beta), + the total variance of the trait (sigma) and also passes intermediates that can + be utilized in other functions. The input parameter h is a value between 0 and 1 and represents + the heritability or the proportion of the total variance attributed to genetics. The X is the + covariate matrix. + """ - self.Yt = matrixMult(self.Kve.T, self.Y) - self.X0t = matrixMult(self.Kve.T, self.X0) - self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) - self.q = self.X0t.shape[1] - - def getMLSoln(self,h,X): - - """ - Obtains the maximum-likelihood estimates for the covariate coefficients (beta), - the total variance of the trait (sigma) and also passes intermediates that can - be utilized in other functions. The input parameter h is a value between 0 and 1 and represents - the heritability or the proportion of the total variance attributed to genetics. The X is the - covariate matrix. - """ - - S = 1.0/(h*self.Kva + (1.0 - h)) - Xt = X.T*S - XX = matrixMult(Xt,X) - XX_i = linalg.inv(XX) - beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) - Yt = self.Yt - matrixMult(X,beta) - Q = np.dot(Yt.T*S,Yt) - sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) - return beta,sigma,Q,XX_i,XX - - def LL_brent(self,h,X=None,REML=False): - #brent will not be bounded by the specified bracket. - # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. - if h < 0: return 1e6 - return -self.LL(h,X,stack=False,REML=REML)[0] + S = 1.0/(h*self.Kva + (1.0 - h)) + Xt = X.T*S + XX = matrixMult(Xt,X) + XX_i = linalg.inv(XX) + beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) + Yt = self.Yt - matrixMult(X,beta) + Q = np.dot(Yt.T*S,Yt) + sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) + return beta,sigma,Q,XX_i,XX + + def LL_brent(self,h,X=None,REML=False): + #brent will not be bounded by the specified bracket. + # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. + if h < 0: return 1e6 + return -self.LL(h,X,stack=False,REML=REML)[0] - def LL(self,h,X=None,stack=True,REML=False): - - """ - Computes the log-likelihood for a given heritability (h). If X==None, then the - default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with - the input X. If stack is false, then X is used in place of X0t in the LL calculation. - REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. - """ - - if X == None: X = self.X0t - elif stack: - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - n = float(self.N) - q = float(X.shape[1]) - beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) - LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) - LL = -0.5 * LL - - if REML: - LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) - LL = LL + 0.5*LL_REML_part - - return LL,beta,sigma,XX_i - - def getMax(self,H, X=None,REML=False): - - """ - Helper functions for .fit(...). - This function takes a set of LLs computed over a grid and finds possible regions - containing a maximum. Within these regions, a Brent search is performed to find the - optimum. - - """ - n = len(self.LLs) - HOpt = [] - for i in range(1,n-2): - if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: - HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] - - if len(HOpt) > 1: - if self.verbose: sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") - return HOpt[0] - elif len(HOpt) == 1: return HOpt[0] - elif self.LLs[0] > self.LLs[n-1]: return H[0] - else: return H[n-1] - - def fit(self,X=None,ngrids=100,REML=True): - - """ - Finds the maximum-likelihood solution for the heritability (h) given the current parameters. - X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as - the covariate matrix. - - This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. - Given this optimum, the function computes the LL and associated ML solutions. - """ - - if X == None: X = self.X0t - else: - #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - H = np.array(range(ngrids)) / float(ngrids) - L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) - self.LLs = L - - hmax = self.getMax(H,X,REML) - L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) - - self.H = H - self.optH = hmax - self.optLL = L - self.optBeta = beta - self.optSigma = sigma - - return hmax,beta,sigma,L - - def association(self,X, h = None, stack=True,REML=True, returnBeta=False): - - """ - Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. - - """ - if stack: - #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - if h == None: h = self.optH - - L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) - q = len(beta) - ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) - - if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma - return ts,ps - - def tstat(self,beta,var,sigma,q): - + def LL(self,h,X=None,stack=True,REML=False): + + """ + Computes the log-likelihood for a given heritability (h). If X==None, then the + default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with + the input X. If stack is false, then X is used in place of X0t in the LL calculation. + REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. + """ + + if X == None: X = self.X0t + elif stack: + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + n = float(self.N) + q = float(X.shape[1]) + beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) + LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) + LL = -0.5 * LL + + if REML: + LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) + LL = LL + 0.5*LL_REML_part + + return LL,beta,sigma,XX_i + + def getMax(self,H, X=None,REML=False): + """ - Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. - This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. + Helper functions for .fit(...). + This function takes a set of LLs computed over a grid and finds possible regions + containing a maximum. Within these regions, a Brent search is performed to find the + optimum. + """ - - ts = beta / np.sqrt(var * sigma) - ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) - if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(") - return ts.sum(),ps.sum() - - def plotFit(self,color='b-',title=''): - - """ - Simple function to visualize the likelihood space. It takes the LLs - calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. - The resulting "probabilities" are normalized to one and plotted against heritability. - This can be seen as an approximation to the posterior distribuiton of heritability. - - For diagnostic purposes this lets you see if there is one distinct maximum or multiple - and what the variance of the parameter looks like. - """ - import matplotlib.pyplot as pl - - mx = self.LLs.max() - p = np.exp(self.LLs - mx) - p = p/p.sum() - - pl.plot(self.H,p,color) - pl.xlabel("Heritability") - pl.ylabel("Probability of data") - pl.title(title) \ No newline at end of file + n = len(self.LLs) + HOpt = [] + for i in range(1,n-2): + if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: + HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + if np.isnan(HOpt[-1][0]): + HOpt[-1][0] = [self.LLs[i-1]] + + if len(HOpt) > 1: + if self.verbose: + sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") + return HOpt[0] + elif len(HOpt) == 1: + return HOpt[0] + elif self.LLs[0] > self.LLs[n-1]: + return H[0] + else: + return H[n-1] + + def fit(self,X=None,ngrids=100,REML=True): + + """ + Finds the maximum-likelihood solution for the heritability (h) given the current parameters. + X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as + the covariate matrix. + + This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. + Given this optimum, the function computes the LL and associated ML solutions. + """ + + if X == None: + X = self.X0t + else: + #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + H = np.array(range(ngrids)) / float(ngrids) + L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) + self.LLs = L + + hmax = self.getMax(H,X,REML) + L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) + + self.H = H + self.optH = hmax + self.optLL = L + self.optBeta = beta + self.optSigma = sigma + + return hmax,beta,sigma,L + + def association(self,X, h = None, stack=True,REML=True, returnBeta=True): + + """ + Calculates association statitics for the SNPs encoded in the vector X of size n. + If h == None, the optimal h stored in optH is used. + + """ + if stack: + #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack + + if h == None: + h = self.optH + + L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) + q = len(beta) + ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) + + if returnBeta: + return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma + return ts,ps + + def tstat(self,beta,var,sigma,q): + + """ + Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. + This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. + """ + + ts = beta / np.sqrt(var * sigma) + ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) + if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(") + return ts.sum(),ps.sum() + + def plotFit(self,color='b-',title=''): + + """ + Simple function to visualize the likelihood space. It takes the LLs + calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. + The resulting "probabilities" are normalized to one and plotted against heritability. + This can be seen as an approximation to the posterior distribuiton of heritability. + + For diagnostic purposes this lets you see if there is one distinct maximum or multiple + and what the variance of the parameter looks like. + """ + import matplotlib.pyplot as pl + + mx = self.LLs.max() + p = np.exp(self.LLs - mx) + p = p/p.sum() + + pl.plot(self.H,p,color) + pl.xlabel("Heritability") + pl.ylabel("Probability of data") + pl.title(title) \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/pylmmGWAS.py index 487949f0..54a230de 100644 --- a/wqflask/wqflask/my_pylmm/pylmmGWAS.py +++ b/wqflask/wqflask/my_pylmm/pylmmGWAS.py @@ -20,7 +20,8 @@ import pdb import time -def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") +def printOutHead(): + out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") def outputResult(id,beta,betaSD,ts,ps): out.write("\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n") @@ -88,7 +89,8 @@ from scipy import linalg from pylmm.lmm import LMM from pylmm import input -if len(args) != 1: parser.error("Incorrect number of arguments") +if len(args) != 1: + parser.error("Incorrect number of arguments") outFile = args[0] if not options.pfile and not options.tfile and not options.bfile: @@ -97,30 +99,40 @@ if not options.kfile: parser.error("Please provide a pre-computed kinship file") # READING PLINK input -if options.verbose: sys.stderr.write("Reading PLINK input...\n") -if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) -elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) -elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) -else: parser.error("You must provide at least one PLINK input file base") +if options.verbose: + sys.stderr.write("Reading PLINK input...\n") +if options.bfile: + IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) +elif options.tfile: + IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) +elif options.pfile: + IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype) +else: + parser.error("You must provide at least one PLINK input file base") if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos'): parser.error("No .pheno file exist for %s" % (options.phenoFile or IN.fbase + '.phenos')) # READING Covariate File if options.covfile: - if options.verbose: sys.stderr.write("Reading covariate file...\n") + if options.verbose: + sys.stderr.write("Reading covariate file...\n") # Read the covariate file -- write this into input.plink P = IN.getCovariates(options.covfile) - if options.noMean: X0 = P - else: X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P]) + if options.noMean: + X0 = P + else: + X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P]) if np.isnan(X0).sum(): parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case. Either remove those individuals with missing values or replace them in some way.") -else: X0 = np.ones((IN.phenos.shape[0],1)) +else: + X0 = np.ones((IN.phenos.shape[0],1)) # READING Kinship - major bottleneck for large datasets -if options.verbose: sys.stderr.write("Reading kinship...\n") +if options.verbose: + sys.stderr.write("Reading kinship...\n") begin = time.time() # This method seems to be the fastest and works if you already know the size of the matrix if options.kfile[-3:] == '.gz': @@ -129,13 +141,15 @@ if options.kfile[-3:] == '.gz': F = f.read() # might exhaust mem if the file is huge K = np.fromstring(F,sep=' ') # Assume that space separated f.close() -else: K = np.fromfile(open(options.kfile,'r'),sep=" ") +else: + K = np.fromfile(open(options.kfile,'r'),sep=" ") K.resize((len(IN.indivs),len(IN.indivs))) end = time.time() # Other slower ways #K = np.loadtxt(options.kfile) #K = np.genfromtxt(options.kfile) -if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin)) +if options.verbose: + sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin)) # PROCESS the phenotype data -- Remove missing phenotype values @@ -144,7 +158,8 @@ Y = IN.phenos[:,options.pheno] v = np.isnan(Y) keep = True - v if v.sum(): - if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum())) + if options.verbose: + sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum())) Y = Y[keep] X0 = X0[keep,:] K = K[keep,:][:,keep] diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index 8942d2ff..43c68942 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -61,29 +61,17 @@ class SearchResultPage(): self.results = [] if 'q' in kw: - self.quick_search = True + #self.quick_search = True self.search_terms = kw['q'] print("self.search_terms is: ", self.search_terms) - self.do_quick_search() + self.quick_search() else: - self.quick_search = False + #self.quick_search = False self.search_terms = kw['search_terms'] self.dataset = create_dataset(kw['dataset']) self.search() self.gen_search_result() - def gen_quick_search_result(self): - self.trait_list = [] - - species_list = [] - - for result in self.results: - if not result: - continue - if result[0] not in species_list: - species_list.append(result[0]) - - def gen_search_result(self): """ @@ -112,7 +100,7 @@ class SearchResultPage(): self.dataset.get_trait_info(self.trait_list, species) - def do_quick_search(self): + def quick_search(self): self.search_terms = parser.parse(self.search_terms) print("After parsing:", self.search_terms) @@ -171,6 +159,7 @@ class SearchResultPage(): search_ob = do_search.DoSearch.get_search(search_type) search_class = getattr(do_search, search_ob) + print("search_class is: ", pf(search_class)) the_search = search_class(search_term, search_operator, self.dataset, diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 5c064359..85e33595 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -679,61 +679,61 @@ class ShowTrait(object): elif this_trait and this_trait.dataset and this_trait.dataset.type =='Publish': #Check if trait is phenotype - if this_trait.confidential: - pass - #tbl.append(HT.TR( - # HT.TD('Pre-publication Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.pre_publication_description, Class="fs13"), valign="top", width=740) - # )) - if webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): - #tbl.append(HT.TR( - # HT.TD('Post-publication Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.post_publication_description, Class="fs13"), valign="top", width=740) - # )) - #tbl.append(HT.TR( - # HT.TD('Pre-publication Abbreviation: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.pre_publication_abbreviation, Class="fs13"), valign="top", width=740) - # )) - #tbl.append(HT.TR( - # HT.TD('Post-publication Abbreviation: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.post_publication_abbreviation, Class="fs13"), valign="top", width=740) - # )) - #tbl.append(HT.TR( - # HT.TD('Lab code: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.lab_code, Class="fs13"), valign="top", width=740) - # )) - pass - #tbl.append(HT.TR( - # HT.TD('Owner: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.owner, Class="fs13"), valign="top", width=740) - # )) - else: - pass - #tbl.append(HT.TR( - # HT.TD('Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.post_publication_description, Class="fs13"), valign="top", width=740) - # )) - #tbl.append(HT.TR( - # HT.TD('Authors: ', Class="fs13 fwb", - # valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.authors, Class="fs13"), - # valign="top", width=740) - # )) - #tbl.append(HT.TR( - # HT.TD('Title: ', Class="fs13 fwb", - # valign="top", nowrap="on", width=90), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span(this_trait.title, Class="fs13"), - # valign="top", width=740) - # )) + #if this_trait.confidential: + # pass + # #tbl.append(HT.TR( + # # HT.TD('Pre-publication Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.pre_publication_description, Class="fs13"), valign="top", width=740) + # # )) + # if webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): + # #tbl.append(HT.TR( + # # HT.TD('Post-publication Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.post_publication_description, Class="fs13"), valign="top", width=740) + # # )) + # #tbl.append(HT.TR( + # # HT.TD('Pre-publication Abbreviation: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.pre_publication_abbreviation, Class="fs13"), valign="top", width=740) + # # )) + # #tbl.append(HT.TR( + # # HT.TD('Post-publication Abbreviation: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.post_publication_abbreviation, Class="fs13"), valign="top", width=740) + # # )) + # #tbl.append(HT.TR( + # # HT.TD('Lab code: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.lab_code, Class="fs13"), valign="top", width=740) + # # )) + # pass + # #tbl.append(HT.TR( + # # HT.TD('Owner: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.owner, Class="fs13"), valign="top", width=740) + # # )) + #else: + # pass + # #tbl.append(HT.TR( + # # HT.TD('Phenotype: ', Class="fs13 fwb", valign="top", nowrap="on", width=90), + # # HT.TD(width=10, valign="top"), + # # HT.TD(HT.Span(this_trait.post_publication_description, Class="fs13"), valign="top", width=740) + # # )) + ##tbl.append(HT.TR( + ## HT.TD('Authors: ', Class="fs13 fwb", + ## valign="top", nowrap="on", width=90), + ## HT.TD(width=10, valign="top"), + ## HT.TD(HT.Span(this_trait.authors, Class="fs13"), + ## valign="top", width=740) + ## )) + ##tbl.append(HT.TR( + ## HT.TD('Title: ', Class="fs13 fwb", + ## valign="top", nowrap="on", width=90), + ## HT.TD(width=10, valign="top"), + ## HT.TD(HT.Span(this_trait.title, Class="fs13"), + ## valign="top", width=740) + ## )) if this_trait.journal: journal = this_trait.journal if this_trait.year: -- cgit v1.2.3 From bf78deec93a6ef3296b4c8cf38a71d1a03480d21 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 2 Apr 2013 22:07:01 +0000 Subject: Committing before splitting code that runs pylmm with plink files and code that runs it with json --- wqflask/base/data_set.py | 80 ++++++++++++++++------ .../wqflask/marker_regression/marker_regression.py | 41 +++-------- .../static/new/javascript/marker_regression.coffee | 1 + .../static/new/javascript/marker_regression.js | 1 + wqflask/wqflask/views.py | 2 +- 5 files changed, 71 insertions(+), 54 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 17881e53..ab8554a0 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -35,6 +35,7 @@ from base import webqtlConfig from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil +from utility.benchmark import Bench from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -73,14 +74,60 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): + print("length of self.markers:", len(self.markers)) + print("length of p_values:", len(p_values)) + + # THIS IS only needed for the case when we are limiting the number of p-values calculated + if len(self.markers) > len(p_values): + self.markers = self.markers[:len(p_values)] + for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + + + +class HumanMarkers(Markers): + + def __init__(self, name): + marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim')) + self.markers = [] + for line in marker_data_fh: + splat = line.strip().split() + marker = {} + marker['chr'] = int(splat[0]) + marker['name'] = splat[1] + marker['Mb'] = float(splat[3]) / 1000000 + self.markers.append(marker) + + #print("markers is: ", pf(self.markers)) + def add_pvalues(self, p_values): + #for marker, p_value in itertools.izip(self.markers, p_values): + # if marker['Mb'] <= 0 and marker['chr'] == 0: + # continue + # marker['p_value'] = p_value + # print("p_value is:", marker['p_value']) + # marker['lod_score'] = -math.log10(marker['p_value']) + # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + super(HumanMarkers, self).add_pvalues(p_values) + + with Bench("deleting markers"): + markers = [] + for marker in self.markers: + if not marker['Mb'] <= 0 and not marker['chr'] == 0: + markers.append(marker) + self.markers = markers + + + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. @@ -104,21 +151,17 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None - self.markers = Markers(self.name) - - - #def read_genotype(self): - # self.read_genotype_file() - # - # if not self.genotype: # Didn'd succeed, so we try method 2 - # self.read_genotype_data() + + + def get_markers(self): + print("self.species is:", self.species) + if self.species == "human": + marker_class = HumanMarkers + else: + marker_class = Markers - #def read_genotype_json(self): - # '''Read genotype from json file''' - # - # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json')) - # markers = json.load(json_data) - # + self.markers = marker_class(self.name) + def get_f1_parent_strains(self): try: @@ -321,12 +364,9 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description - - try: - this_trait.description_display.decode('ascii') - except Exception: - this_trait.description_display = this_trait.description_display.decode('utf-8') + this_trait.description_display = description.decode('utf-8') + + if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c3555e8f..a640d37f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -57,7 +57,6 @@ class MarkerRegression(object): chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, ) - def gen_data(self, tempdata): @@ -67,8 +66,8 @@ class MarkerRegression(object): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') - - + + pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -83,9 +82,6 @@ class MarkerRegression(object): eigen_values = [] eigen_vectors = [] - - print("pheno_vector shape is: ", pf(pheno_vector.shape)) - #print("pheno_vector is: ", pf(pheno_vector)) #print("kinship_matrix is: ", pf(kinship_matrix)) @@ -101,9 +97,6 @@ class MarkerRegression(object): # eigen_values = np.fromfile(file_base + ".kin.kva") # eigen_vectors = np.fromfile(file_base + ".kin.kve") - #print("eigen_values is: ", pf(eigen_values)) - #print("eigen_vectors is: ", pf(eigen_vectors)) - n = kinship_matrix.shape[0] lmm_ob = lmm.LMM(pheno_vector, kinship_matrix, @@ -121,8 +114,8 @@ class MarkerRegression(object): print("# snps is: ", pf(plink_input.numSNPs)) with Bench("snp iterator loop"): for snp, this_id in plink_input: - #if count > 10000: - # break + if count > 1000: + break count += 1 x = snp[keep].reshape((n,1)) @@ -138,13 +131,13 @@ class MarkerRegression(object): p_values.append(np.nan) t_statistics.append(np.nan) continue - + # Its ok to center the genotype - I used options.normalizeGenotype to # force the removal of missing genotypes as opposed to replacing them with MAF. - + #if not options.normalizeGenotype: # xs = (xs - xs.mean()) / np.sqrt(xs.var()) - + filtered_pheno = pheno_vector[keeps] filtered_covariate_matrix = covariate_matrix[keeps,:] filtered_kinship_matrix = kinship_matrix[keeps,:][:,keeps] @@ -167,7 +160,6 @@ class MarkerRegression(object): ts,ps,beta,betaVar = lmm_ob.association(x) p_values.append(ps) t_statistics.append(ts) - #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] # @@ -187,28 +179,11 @@ class MarkerRegression(object): # temp_data=tempdata #) - print("p_values is: ", pf(p_values)) + self.dataset.group.get_markers() self.dataset.group.markers.add_pvalues(p_values) - #self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] - #lrs_values_sorted = sorted(self.lrs_values) - # - #lrs_values_length = len(lrs_values_sorted) - # - #def lrs_threshold(place): - # return lrs_values_sorted[int((lrs_values_length * place) -1)] - # - #self.lrs_thresholds = Bunch( - # suggestive = lrs_threshold(.37), - # significant = lrs_threshold(.95), - # highly_significant = lrs_threshold(.99), - # ) - self.qtl_results = self.dataset.group.markers.markers - for marker in self.qtl_results: - if marker['lrs_value'] > webqtlConfig.MAXLRS: - marker['lrs_value'] = webqtlConfig.MAXLRS def identify_empty_samples(self): no_val_samples = [] diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee index 6e605fa7..3e14ab6b 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee +++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee @@ -2,6 +2,7 @@ $ -> class Manhattan_Plot constructor: (@plot_height, @plot_width) -> @qtl_results = js_data.qtl_results + console.log("qtl_results are:", @qtl_results) @chromosomes = js_data.chromosomes @total_length = 0 diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js index cb3c09cb..09470daf 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.js +++ b/wqflask/wqflask/static/new/javascript/marker_regression.js @@ -11,6 +11,7 @@ this.plot_height = plot_height; this.plot_width = plot_width; this.qtl_results = js_data.qtl_results; + console.log("qtl_results are:", this.qtl_results); this.chromosomes = js_data.chromosomes; this.total_length = 0; this.max_chr = this.get_max_chr(); diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 46433430..7f5f88e0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -168,7 +168,7 @@ def marker_regression_page(): if key in wanted or key.startswith(('value:')): start_vars[key] = value - version = "v5" + version = "v13" key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) with Bench("Loading cache"): result = Redis.get(key) -- cgit v1.2.3 From cde27eb1f638eb4b769ba37202bb82b552ace44c Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 5 Apr 2013 19:35:28 +0000 Subject: Made changes to get pylmm code working with HMDP datasets --- wqflask/base/data_set.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index ab8554a0..16bc4ba4 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -74,16 +74,12 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): - print("length of self.markers:", len(self.markers)) - print("length of p_values:", len(p_values)) - # THIS IS only needed for the case when we are limiting the number of p-values calculated if len(self.markers) > len(p_values): self.markers = self.markers[:len(p_values)] for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value - print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 @@ -99,9 +95,9 @@ class HumanMarkers(Markers): for line in marker_data_fh: splat = line.strip().split() marker = {} - marker['chr'] = int(splat[0]) + marker['chr'] = splat[0] marker['name'] = splat[1] - marker['Mb'] = float(splat[3]) / 1000000 + marker['Mb'] = str(float(splat[3]) / 1000000) self.markers.append(marker) #print("markers is: ", pf(self.markers)) @@ -122,7 +118,8 @@ class HumanMarkers(Markers): with Bench("deleting markers"): markers = [] for marker in self.markers: - if not marker['Mb'] <= 0 and not marker['chr'] == 0: + #if not float(marker['Mb']) <= 0 or not float(marker['chr']) == 0: + if float(marker['Mb']) > 0 and marker['chr'] != "0": markers.append(marker) self.markers = markers -- cgit v1.2.3 From c7c306c69254ca49ddeccc495a8a096fcf03974d Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 9 Apr 2013 19:36:43 +0000 Subject: Nick's code works fine with human data Added option to limit results based on lod score (which also changes the y-axis of the plot) --- wqflask/base/data_set.py | 2 -- wqflask/base/species.py | 10 +++++----- .../wqflask/marker_regression/marker_regression.py | 13 ++++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++-- .../static/new/javascript/marker_regression.coffee | 16 +++++++++------- .../static/new/javascript/marker_regression.js | 12 ++++++------ .../new/javascript/show_trait_mapping_tools.coffee | 12 +++++++++++- .../new/javascript/show_trait_mapping_tools.js | 11 ++++++++++- .../templates/show_trait_mapping_tools.html | 22 ++++++++++------------ wqflask/wqflask/views.py | 1 + 10 files changed, 62 insertions(+), 41 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 16bc4ba4..111597a9 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -83,8 +83,6 @@ class Markers(object): marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - - class HumanMarkers(Markers): diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 689e5c05..191f4535 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -16,8 +16,7 @@ class TheSpecies(object): print("self.dataset is:", pf(self.dataset.__dict__)) self.chromosomes = Chromosomes(self.dataset) self.genome_mb_length = self.chromosomes.get_genome_mb_length() - - + #@property #def chromosomes(self): # chromosomes = [("All", -1)] @@ -31,7 +30,8 @@ class TheSpecies(object): # return chromosomes class IndChromosome(object): - def __init__(self, length): + def __init__(self, name, length): + self.name = name self.length = length @property @@ -50,7 +50,7 @@ class Chromosomes(object): results = g.db.execute(""" Select - Chr_Length.Name, Length from Chr_Length, InbredSet + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet where Chr_Length.SpeciesId = InbredSet.SpeciesId AND InbredSet.Name = %s @@ -59,7 +59,7 @@ class Chromosomes(object): print("bike:", results) for item in results: - self.chromosomes[item.Name] = IndChromosome(item.Length) + self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) self.set_mb_graph_interval() #self.get_cm_length_list() diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 545c8162..86d9fe06 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -41,6 +41,8 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] + print("start_vars: ", pf(start_vars)) + self.suggestive = float(start_vars['suggestive']) for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] @@ -50,12 +52,13 @@ class MarkerRegression(object): self.gen_data(tempdata) #Get chromosome lengths for drawing the manhattan plot - chromosome_mb_lengths = {} + chromosomes = {} for key in self.species.chromosomes.chromosomes.keys(): - chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length + this_chr = self.species.chromosomes.chromosomes[key] + chromosomes[key] = [this_chr.name, this_chr.mb_length] self.js_data = dict( - chromosomes = chromosome_mb_lengths, + chromosomes = chromosomes, qtl_results = self.qtl_results, ) @@ -87,12 +90,12 @@ class MarkerRegression(object): refit=False, temp_data=tempdata ) - + self.dataset.group.markers.add_pvalues(p_values) self.qtl_results = [] for marker in self.dataset.group.markers.markers: - if marker['p_value'] < 0.2: + if marker['lod_score'] >= self.suggestive: self.qtl_results.append(marker) #self.qtl_results = self.dataset.group.markers.markers diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 0e965c8e..e5978933 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -66,8 +66,8 @@ def run_human(pheno_vector, with Bench("snp iterator loop"): count = 0 for snp, this_id in plink_input: - #if count > 5000: - # break + if count > 5000: + break count += 1 diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee index 78b6fdbc..fd1fac54 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee +++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee @@ -25,6 +25,7 @@ $ -> #@x_max = d3.max(@x_coords) @x_max = @total_length + @y_min = d3.min(@y_coords) @y_max = d3.max(@y_coords) * 1.2 @svg = @create_svg() @@ -57,7 +58,7 @@ $ -> chr_lengths = [] total_length = 0 for key of @chromosomes - this_length = @chromosomes[key] + this_length = @chromosomes[key][1] chr_lengths.push(this_length) cumulative_chr_lengths.push(total_length + this_length) total_length += this_length @@ -70,10 +71,11 @@ $ -> chr_lengths = [] chr_seen = [] for result in js_data.qtl_results - chr_length = @chromosomes[result.chr] + chr_length = @chromosomes[result.chr][1] if not(result.chr in chr_seen) chr_seen.push(result.chr) - chr_lengths.push(chr_length) + chr_lengths.push(chr_length) + if result.chr != "1" @total_length += chr_lengths[chr_lengths.length - 2] @x_coords.push(@total_length + parseFloat(result.Mb)) @@ -104,7 +106,7 @@ $ -> @add_x_axis() @add_y_axis() @add_chr_lines() - @fill_chr_areas() + #@fill_chr_areas() @add_chr_labels() @add_plot_points() @@ -138,7 +140,7 @@ $ -> .range([@x_buffer, @plot_width]) @y_scale = d3.scale.linear() - .domain([0, @y_max]) + .domain([@y_min, @y_max]) .range([@plot_height, @y_buffer]) create_x_axis_tick_values: () -> @@ -244,7 +246,7 @@ $ -> add_chr_labels: () -> chr_names = [] for key of @chromosomes - chr_names.push(key) + chr_names.push(@chromosomes[key][0]) chr_info = _.zip(chr_names, @chr_lengths, @cumulative_chr_lengths) @svg.selectAll("text") .data(chr_info, (d) => @@ -274,7 +276,7 @@ $ -> return @x_buffer + ((@plot_width-@x_buffer) * d[0]/@x_max) ) .attr("cy", (d) => - return @plot_height - ((@plot_height-@y_buffer) * d[1]/@y_max) + return @plot_height - ((@plot_height-@y_buffer) * (d[1]-@y_min)/@y_max) ) .attr("r", 2) .attr("id", (d) => diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js index 25d88ec0..37a4e1e5 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.js +++ b/wqflask/wqflask/static/new/javascript/marker_regression.js @@ -27,6 +27,7 @@ this.x_buffer = this.plot_width / 30; this.y_buffer = this.plot_height / 20; this.x_max = this.total_length; + this.y_min = d3.min(this.y_coords); this.y_max = d3.max(this.y_coords) * 1.2; this.svg = this.create_svg(); this.plot_coordinates = _.zip(this.x_coords, this.y_coords, this.marker_names); @@ -65,7 +66,7 @@ chr_lengths = []; total_length = 0; for (key in this.chromosomes) { - this_length = this.chromosomes[key]; + this_length = this.chromosomes[key][1]; chr_lengths.push(this_length); cumulative_chr_lengths.push(total_length + this_length); total_length += this_length; @@ -80,7 +81,7 @@ _ref = js_data.qtl_results; for (_i = 0, _len = _ref.length; _i < _len; _i++) { result = _ref[_i]; - chr_length = this.chromosomes[result.chr]; + chr_length = this.chromosomes[result.chr][1]; if (!(_ref1 = result.chr, __indexOf.call(chr_seen, _ref1) >= 0)) { chr_seen.push(result.chr); chr_lengths.push(chr_length); @@ -120,7 +121,6 @@ this.add_x_axis(); this.add_y_axis(); this.add_chr_lines(); - this.fill_chr_areas(); this.add_chr_labels(); return this.add_plot_points(); }; @@ -142,7 +142,7 @@ Manhattan_Plot.prototype.create_scales = function() { this.x_scale = d3.scale.linear().domain([0, d3.max(this.x_coords)]).range([this.x_buffer, this.plot_width]); - return this.y_scale = d3.scale.linear().domain([0, this.y_max]).range([this.plot_height, this.y_buffer]); + return this.y_scale = d3.scale.linear().domain([this.y_min, this.y_max]).range([this.plot_height, this.y_buffer]); }; Manhattan_Plot.prototype.create_x_axis_tick_values = function() { @@ -233,7 +233,7 @@ _this = this; chr_names = []; for (key in this.chromosomes) { - chr_names.push(key); + chr_names.push(this.chromosomes[key][0]); } chr_info = _.zip(chr_names, this.chr_lengths, this.cumulative_chr_lengths); return this.svg.selectAll("text").data(chr_info, function(d) { @@ -250,7 +250,7 @@ return this.svg.selectAll("circle").data(this.plot_coordinates).enter().append("circle").attr("cx", function(d) { return _this.x_buffer + ((_this.plot_width - _this.x_buffer) * d[0] / _this.x_max); }).attr("cy", function(d) { - return _this.plot_height - ((_this.plot_height - _this.y_buffer) * d[1] / _this.y_max); + return _this.plot_height - ((_this.plot_height - _this.y_buffer) * (d[1] - _this.y_min) / _this.y_max); }).attr("r", 2).attr("id", function(d) { return "point_" + String(d[2]); }).classed("circle", true).on("mouseover", function(d) { diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee index bc176ab9..03f872ca 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.coffee @@ -60,7 +60,17 @@ $ -> ) return false - $("#marker_regression").click(() => + $('#suggestive').hide() + + $('input[name=display_all]').change(() => + console.log("check") + if $('input[name=display_all]:checked').val() == "False" + $('#suggestive').show() + else + $('#suggestive').hide() + ) + + $("#marker_regression_compute").click(() => $("#progress_bar_container").modal() url = "/marker_regression" form_data = $('#trait_data_form').serialize() diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index b017257a..329dcdd9 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -62,7 +62,16 @@ }); return false; }; - $("#marker_regression").click(function() { + $('#suggestive').hide(); + $('input[name=display_all]').change(function() { + console.log("check"); + if ($('input[name=display_all]:checked').val() === "False") { + return $('#suggestive').show(); + } else { + return $('#suggestive').hide(); + } + }); + $("#marker_regression_compute").click(function() { var form_data, url; $("#progress_bar_container").modal(); url = "/marker_regression"; diff --git a/wqflask/wqflask/templates/show_trait_mapping_tools.html b/wqflask/wqflask/templates/show_trait_mapping_tools.html index a98a75c7..c2d5211f 100644 --- a/wqflask/wqflask/templates/show_trait_mapping_tools.html +++ b/wqflask/wqflask/templates/show_trait_mapping_tools.html @@ -95,26 +95,24 @@
-
- +
+
-
-
- +
+
+
- +
@@ -127,7 +125,7 @@
-
{% include 'show_trait_details.html' %} - {% include 'show_trait_statistics.html' %} + {# {% include 'show_trait_statistics.html' %} #} {% include 'show_trait_calculate_correlations.html' %} {% include 'show_trait_mapping_tools.html' %} {% include 'show_trait_edit_data.html' %} diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 543afadd..12a064c0 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -1,130 +1,119 @@ -

  Calculate Correlations

+
+

Calculate Correlations

+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+ +
+ +
+
+ +
+
+ +
+
-

+ + The Sample Correlation + is computed + between trait data and any
+ other traits in the sample database selected above. Use + Spearman + Rank
+ when the sample size is small (<20) or when there are influential outliers. +
+ + - - - - -
-
-
- - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
Method: - -
Database: - -
Return:
Samples: - -
-
-
- Pearson -     - Spearman Rank -
-
- -

- - - The Sample Correlation - is computed - between trait data and any
- other traits in the sample database selected above. Use - Spearman - Rank
- when the sample size is small (<20) or when there are influential outliers. -
- - - -
-
-
-
-
+
+
\ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 7a504c54..8531561a 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -210,7 +210,7 @@ def marker_regression_page(): @app.route("/corr_compute", methods=('POST',)) def corr_compute_page(): - print("In corr_compute, request.args is:", pf(request.form)) + print("In corr_compute, request.form is:", pf(request.form)) #fd = webqtlFormData.webqtlFormData(request.form) template_vars = show_corr_results.CorrelationResults(request.form) return render_template("correlation_page.html", **template_vars.__dict__) -- cgit v1.2.3 From 5a3f413da480123e3ad943b5f556e0a557f185cc Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 9 May 2013 22:54:34 +0000 Subject: Just added some print statements so I can show matrix/vector shapes to Tony --- wqflask/base/data_set.py | 2 +- .../wqflask/marker_regression/marker_regression.py | 5 +++-- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 24 +++++++++++++++------- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 1520b180..d7328441 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -385,7 +385,7 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description + this_trait.description_display = unicode(description, "utf8") if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 6ae1318e..334ce631 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -78,8 +78,9 @@ class MarkerRegression(object): genotype_matrix = np.array(trimmed_genotype_data).T - print("pheno_vector is: ", pf(pheno_vector)) - print("genotype_matrix is: ", pf(genotype_matrix)) + print("pheno_vector: ", pf(pheno_vector)) + print("genotype_matrix: ", pf(genotype_matrix)) + print("genotype_matrix.shape: ", pf(genotype_matrix.shape)) t_stats, p_values = lmm.run( pheno_vector, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index fc021a0b..62fb0fbd 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -58,6 +58,10 @@ def run_human(pheno_vector, identifier = str(uuid.uuid4()) + print("pheno_vector: ", pf(pheno_vector)) + print("kinship_matrix: ", pf(kinship_matrix)) + print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) + lmm_vars = pickle.dumps(dict( pheno_vector = pheno_vector, covariate_matrix = covariate_matrix, @@ -70,12 +74,12 @@ def run_human(pheno_vector, pheno_vector = pheno_vector[keep] #print("pheno_vector shape is now: ", pf(pheno_vector.shape)) covariate_matrix = covariate_matrix[keep,:] - print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) + #print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) #print("len(keep) is: ", pf(keep.shape)) kinship_matrix = kinship_matrix[keep,:][:,keep] n = kinship_matrix.shape[0] - print("n is:", n) + #print("n is:", n) lmm_ob = LMM(pheno_vector, kinship_matrix, covariate_matrix) @@ -86,7 +90,7 @@ def run_human(pheno_vector, p_values = [] t_stats = [] - print("input_file: ", plink_input_file) + #print("input_file: ", plink_input_file) with Bench("Opening and loading pickle file"): with gzip.open(plink_input_file, "rb") as input_file: @@ -103,6 +107,8 @@ def run_human(pheno_vector, with Bench("Create list of inputs"): inputs = list(plink_input) + + print("len(genotypes): ", len(inputs)) with Bench("Divide into chunks"): results = chunks.divide_into_chunks(inputs, 64) @@ -116,7 +122,7 @@ def run_human(pheno_vector, timestamp = datetime.datetime.utcnow().isoformat() - print("Starting adding loop") + #print("Starting adding loop") for part, result in enumerate(results): #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) holder = pickle.dumps(dict( @@ -126,10 +132,10 @@ def run_human(pheno_vector, result = result ), pickle.HIGHEST_PROTOCOL) - print("Adding:", part) + #print("Adding:", part) Redis.rpush(key, zlib.compress(holder)) - print("End adding loop") - print("***** Added to {} queue *****".format(key)) + #print("End adding loop") + #print("***** Added to {} queue *****".format(key)) for snp, this_id in plink_input: #with Bench("part before association"): if count > 2000: @@ -157,6 +163,10 @@ def run_human(pheno_vector, return p_values, t_stats +#class HumanAssociation(object): +# def __init__(self): +# + def human_association(snp, n, keep, -- cgit v1.2.3 From e31d163325d0d417bf266d1c3d9e52b6ff00f83b Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 23 May 2013 20:53:11 +0000 Subject: Now calculates correlation values for traits, but not yet in template --- wqflask/base/data_set.py | 46 +++++++++++++------- wqflask/wqflask/correlation/show_corr_results.py | 55 ++++++++++++++++-------- 2 files changed, 67 insertions(+), 34 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 1520b180..89bbf03d 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -46,7 +46,7 @@ from pprint import pformat as pf DS_NAME_MAP = {} def create_dataset(dataset_name): - print("dataset_name:", dataset_name) + #print("dataset_name:", dataset_name) query = """ SELECT DBType.Name @@ -71,7 +71,7 @@ def create_dataset(dataset_name): def mescape(*items): """Multiple escape""" escaped = [escape(item) for item in items] - print("escaped is:", escaped) + #print("escaped is:", escaped) return escaped @@ -235,6 +235,7 @@ class DataSet(object): self.retrieve_other_names() self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype + self.group.read_genotype_file() self.species = species.TheSpecies(self) @@ -624,17 +625,34 @@ class MrnaAssayDataSet(DataSet): return trait_data def get_trait_data(self): + import pdb + pdb.set_trace() + #samplelist = [] + #samplelist += self.group.samplelist + #samplelist += self.group.parlist + #samplelist += self.group.f1list + #self.samplelist = samplelist + + self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list + sample_ids = [] - for sample in self.group.samplelist: - query = """ - SELECT Strain.Id FROM Strain, Species - WHERE Strain.Name = '{}' - and Strain.SpeciesId=Species.Id - and Species.name = '{}' - """.format(*mescape(sample, self.group.species)) - this_id = g.db.execute(query).fetchone()[0] - sample_ids.append('%d' % this_id) - print("sample_ids size: ", len(sample_ids)) + + where_clause = "" + for sample in self.samplelist: + if len(where_clause): + where_clause += " or " + where_clause += """'{}'""".format(*mescape(sample)) + + query = """ + SELECT Strain.Id, Strain.Name FROM Strain, Species + WHERE Strain.Name = '{}' + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(*mescape(where_clause, self.group.species)) + result = g.db.execute(query).fetchall() + + print("[blueberry] result is:", pf(result)) + #sample_ids.append('%d' % this_id) # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks @@ -642,7 +660,6 @@ class MrnaAssayDataSet(DataSet): n = len(sample_ids) / chunk_count if len(sample_ids) % chunk_count: n += 1 - print("n: ", n) #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId #tempTable = None #if GeneId and db.type == "ProbeSet": @@ -681,10 +698,9 @@ class MrnaAssayDataSet(DataSet): order by {}.Id """.format(*mescape(self.type, self.type, self.type, self.type, self.name, self.type, self.type, self.type, self.type)) - print("query: ", query) results = g.db.execute(query).fetchall() trait_sample_data.append(results) - + trait_count = len(trait_sample_data[0]) self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 1d0368cc..ee732050 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -37,6 +37,7 @@ import time #import pyXLWriter as xl import pp import math +import collections from pprint import pformat as pf @@ -285,16 +286,15 @@ class CorrelationResults(object): # name=start_vars['trait_id'], # cellid=None) - print("start_vars: ", pf(start_vars)) + #print("start_vars: ", pf(start_vars)) helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() - - self.samples = [] # Want only ones with values - self.vals = [] corr_samples_group = start_vars['corr_samples_group'] + self.sample_data = {} + #The two if statements below append samples to the sample list based upon whether the user #selected Primary Samples Only, Other Samples Only, or All Samples @@ -310,16 +310,24 @@ class CorrelationResults(object): self.dataset.group.f1list + self.dataset.group.samplelist) self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) - - #for i, sample in enumerate(self.samples): - # print("{} : {}".format(sample, self.vals[i])) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() - print("trait_list: {}".format(pf(self.target_dataset.trait_data))) # Lei Yan todo + import pdb + pdb.set_trace() + correlation_data = collections.defaultdict(list) for trait, values in self.target_dataset.trait_data.iteritems(): - correlation = calCorrelation(values, ) + values_1 = [] + values_2 = [] + for index,sample in enumerate(self.target_dataset.samplelist): + target_value = values[index] + if sample in self.sample_data.keys(): + this_value = self.sample_data[sample] + values_1.append(this_value) + values_2.append(target_value) + correlation = calCorrelation(values_1, values_2) + correlation_data[trait] = correlation + print ('%s %s' % (trait, correlation)) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset @@ -779,19 +787,28 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); """ + #def process_samples(self, start_vars, sample_names, excluded_samples): + # for sample in sample_names: + # if sample not in excluded_samples: + # value = start_vars['value:' + sample] + # variance = start_vars['variance:' + sample] + # if variance.strip().lower() == 'x': + # variance = 0 + # else: + # variance = float(variance) + # if value.strip().lower() != 'x': + # self.samples.append(str(sample)) + # self.vals.append(float(value)) + # #self.variances.append(variance) + def process_samples(self, start_vars, sample_names, excluded_samples): for sample in sample_names: if sample not in excluded_samples: value = start_vars['value:' + sample] - variance = start_vars['variance:' + sample] - if variance.strip().lower() == 'x': - variance = 0 + if value.strip().lower() == 'x': + self.sample_data[str(sample)] = None else: - variance = float(variance) - if value.strip().lower() != 'x': - self.samples.append(str(sample)) - self.vals.append(float(value)) - #self.variances.append(variance) + self.sample_data[str(sample)] = float(value) def getSortByValue(self, calculationMethod): @@ -2134,7 +2151,7 @@ Resorting this table
def calCorrelation(values_1, values_2): - N = Math.min(len(values_1), len(values_2)) + N = min(len(values_1), len(values_2)) X = [] Y = [] for i in range(N): -- cgit v1.2.3 From 953b41486b035fbe786c7d2675f7b6cf898c12da Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 23 May 2013 21:19:00 +0000 Subject: Changed the way the query that gets sample ids is generated --- wqflask/base/data_set.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 89bbf03d..b2836480 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -625,33 +625,24 @@ class MrnaAssayDataSet(DataSet): return trait_data def get_trait_data(self): - import pdb - pdb.set_trace() - #samplelist = [] - #samplelist += self.group.samplelist - #samplelist += self.group.parlist - #samplelist += self.group.f1list - #self.samplelist = samplelist - self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list - sample_ids = [] - where_clause = "" - for sample in self.samplelist: - if len(where_clause): - where_clause += " or " - where_clause += """'{}'""".format(*mescape(sample)) + #for sample in self.samplelist: + # if len(where_clause): + # where_clause += " or " + # where_clause += "Strain.Name = '{}'".format(*mescape(sample)) query = """ SELECT Strain.Id, Strain.Name FROM Strain, Species - WHERE Strain.Name = '{}' + WHERE ({}) and Strain.SpeciesId=Species.Id and Species.name = '{}' - """.format(*mescape(where_clause, self.group.species)) + """.format(where_clause, *mescape(self.group.species)) + print("raspberry query: ", query) result = g.db.execute(query).fetchall() - print("[blueberry] result is:", pf(result)) + print("[blackberry] result is:", pf(result)) #sample_ids.append('%d' % this_id) # MySQL limits the number of tables that can be used in a join to 61, -- cgit v1.2.3 From 8d0c6166a297d2cc89394649b8f56d8c6bf5d0f7 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 23 May 2013 23:01:54 +0000 Subject: Worked on rewriting the function in data_set.py that gets the sample values for each trait --- wqflask/base/data_set.py | 72 ++++++++++++++++-------- wqflask/wqflask/correlation/show_corr_results.py | 5 +- 2 files changed, 49 insertions(+), 28 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index b2836480..edee6685 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -38,6 +38,7 @@ from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench +from wqflask.my_pylmm.pyLMM import chunks from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -68,6 +69,13 @@ def create_dataset(dataset_name): dataset_class = globals()[dataset_ob] return dataset_class(dataset_name) +def create_in_clause(items): + """Create an in clause for mysql""" + in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) + in_clause = '( {} )'.format(in_clause) + return in_clause + + def mescape(*items): """Multiple escape""" escaped = [escape(item) for item in items] @@ -626,31 +634,45 @@ class MrnaAssayDataSet(DataSet): def get_trait_data(self): self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list - sample_ids = [] - where_clause = "" - #for sample in self.samplelist: - # if len(where_clause): - # where_clause += " or " - # where_clause += "Strain.Name = '{}'".format(*mescape(sample)) - + #query_samplelist = ', '.join("'{}'".format(x) for x in mescape(*samplelist)) + #query_samplelist = '( ' + query_samplelist + ' )' + #query_samplelist = create_in(samplelist) + + print("self.samplelist is:", self.samplelist) + query = """ - SELECT Strain.Id, Strain.Name FROM Strain, Species - WHERE ({}) + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} and Strain.SpeciesId=Species.Id and Species.name = '{}' - """.format(where_clause, *mescape(self.group.species)) - print("raspberry query: ", query) - result = g.db.execute(query).fetchall() + """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) + results = dict(g.db.execute(query).fetchall()) + print("results are:", results) + print("type results are:", type(results)) + + #sample_ids = [] + #for item in self.samplelist: + # sample_ids.append(results[item]) + + sample_ids = [results[item] for item in self.samplelist] + print("sample_ids are:", sample_ids) + + #for sample in self.samplelist: + # pass - print("[blackberry] result is:", pf(result)) - #sample_ids.append('%d' % this_id) + #for index in range(len(results)): + # sample_ids.append(results[index][0]) # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks - chunk_count = 50 - n = len(sample_ids) / chunk_count - if len(sample_ids) % chunk_count: - n += 1 + # Postgres doesn't have that limit, so we can get rid of this after we transition + chunk_size = 50 + + number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) + + trait_sample_data = [] + for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): + #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId #tempTable = None #if GeneId and db.type == "ProbeSet": @@ -664,12 +686,14 @@ class MrnaAssayDataSet(DataSet): # TissueProbeSetFreezeId=tissueProbeSetFreezeId, # method=method, # returnNumber=returnNumber) - trait_sample_data = [] - for step in range(int(n)): - temp = [] - sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)] - for item in sample_ids_step: - temp.append('T%s.value' % item) + + #for step in range(int(n)): + #temp = [] + #sample_ids_step = sample_ids[step*chunk_size:min(len(sample_ids), (step+1)*chunk_size)] + #for item in sample_ids_step: + # temp.append('T%s.value' % item) + + temp = ['T%s.value' % item for item in sample_ids_step] query = "SELECT {}.Name,".format(escape(self.type)) data_start_pos = 1 query += string.join(temp, ', ') diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index ee732050..9b1843bd 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -312,9 +312,6 @@ class CorrelationResults(object): self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() - # Lei Yan todo - import pdb - pdb.set_trace() correlation_data = collections.defaultdict(list) for trait, values in self.target_dataset.trait_data.iteritems(): values_1 = [] @@ -327,7 +324,7 @@ class CorrelationResults(object): values_2.append(target_value) correlation = calCorrelation(values_1, values_2) correlation_data[trait] = correlation - print ('%s %s' % (trait, correlation)) + print ('correlation result: %s %s' % (trait, correlation)) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset -- cgit v1.2.3 From cb639316fe007c8bcad731976e8b095dee59115e Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Tue, 28 May 2013 23:10:22 +0000 Subject: Have correlation values appearing in a table in the template Use scipy to calculate pearson correlation instead of old GN code --- wqflask/base/data_set.py | 34 +- wqflask/wqflask/correlation/show_corr_results.py | 1320 ++------------------ wqflask/wqflask/templates/correlation_page.html | 1411 +--------------------- 3 files changed, 152 insertions(+), 2613 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index edee6685..c2380f8c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -78,7 +78,7 @@ def create_in_clause(items): def mescape(*items): """Multiple escape""" - escaped = [escape(item) for item in items] + escaped = [escape(str(item)) for item in items] #print("escaped is:", escaped) return escaped @@ -634,12 +634,6 @@ class MrnaAssayDataSet(DataSet): def get_trait_data(self): self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list - #query_samplelist = ', '.join("'{}'".format(x) for x in mescape(*samplelist)) - #query_samplelist = '( ' + query_samplelist + ' )' - #query_samplelist = create_in(samplelist) - - print("self.samplelist is:", self.samplelist) - query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} @@ -647,29 +641,13 @@ class MrnaAssayDataSet(DataSet): and Species.name = '{}' """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) results = dict(g.db.execute(query).fetchall()) - print("results are:", results) - print("type results are:", type(results)) - - #sample_ids = [] - #for item in self.samplelist: - # sample_ids.append(results[item]) - sample_ids = [results[item] for item in self.samplelist] - print("sample_ids are:", sample_ids) - - #for sample in self.samplelist: - # pass - - #for index in range(len(results)): - # sample_ids.append(results[index][0]) # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks # Postgres doesn't have that limit, so we can get rid of this after we transition chunk_size = 50 - number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) - trait_sample_data = [] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): @@ -687,12 +665,6 @@ class MrnaAssayDataSet(DataSet): # method=method, # returnNumber=returnNumber) - #for step in range(int(n)): - #temp = [] - #sample_ids_step = sample_ids[step*chunk_size:min(len(sample_ids), (step+1)*chunk_size)] - #for item in sample_ids_step: - # temp.append('T%s.value' % item) - temp = ['T%s.value' % item for item in sample_ids_step] query = "SELECT {}.Name,".format(escape(self.type)) data_start_pos = 1 @@ -722,10 +694,10 @@ class MrnaAssayDataSet(DataSet): # trait names and values are lists of sample values for j in range(trait_count): trait_name = trait_sample_data[0][j][0] - for i in range(int(n)): + for i in range(int(number_chunks)): self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:] - + def get_trait_info(self, trait_list=None, species=''): # Note: setting trait_list to [] is probably not a great idea. diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 9b1843bd..aa20eba1 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -39,6 +39,8 @@ import pp import math import collections +import scipy + from pprint import pformat as pf from htmlgen import HTMLgen2 as HT @@ -54,6 +56,7 @@ from utility import webqtlUtil, helper_functions from dbFunction import webqtlDatabaseFunction import utility.webqtlUtil #this is for parallel computing only. from wqflask.correlation import correlationFunction +from utility.benchmark import Bench from pprint import pformat as pf @@ -69,197 +72,6 @@ TISSUE_MOUSE_DB = 1 class AuthException(Exception): pass - -class Trait(object): - - - def __init__(self, name, raw_values = None, lit_corr = None, tissue_corr = None, p_tissue = None): - self.name = name - self.raw_values = raw_values - self.lit_corr = lit_corr - self.tissue_corr = tissue_corr - self.p_tissue = p_tissue - self.correlation = 0 - self.p_value = 0 - - @staticmethod - def from_csv(line, data_start = 1): - name = line[0] - numbers = line[data_start:] - # _log.info(numbers) - numbers = [ float(number) for number in numbers ] - - return Trait(name, raw_values = numbers) - - def calculate_correlation(self, values, method): - """Calculate the correlation value and p value according to the method specified""" - - #ZS: This takes the list of values of the trait our selected trait is being correlated - #against and removes the values of the samples our trait has no value for - #There's probably a better way of dealing with this, but I'll have to ask Christian - updated_raw_values = [] - updated_values = [] - for i in range(len(values)): - if values[i] != "None": - updated_raw_values.append(self.raw_values[i]) - updated_values.append(values[i]) - - self.raw_values = updated_raw_values - values = updated_values - - if method == METHOD_SAMPLE_PEARSON or method == METHOD_LIT or method == METHOD_TISSUE_PEARSON: - corr,nOverlap = webqtlUtil.calCorrelation(self.raw_values, values, len(values)) - else: - corr,nOverlap = webqtlUtil.calCorrelationRank(self.raw_values, values, len(values)) - - self.correlation = corr - self.overlap = nOverlap - - if self.overlap < 3: - self.p_value = 1.0 - else: - #ZS - This is probably the wrong way to deal with this. Correlation values of 1.0 definitely exist (the trait correlated against itself), so zero division needs to br prevented. - if abs(self.correlation) >= 1.0: - self.p_value = 0.0 - else: - #Confirm that this division works after future import - ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation)) - ZValue = ZValue*sqrt(self.overlap-3) - self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) - - - -#XZ, 01/14/2009: This method is for parallel computing only. -#XZ: It is supposed to be called when "Genetic Correlation, Pearson's r" (method 1) -#XZ: or "Genetic Correlation, Spearman's rho" (method 2) is selected -def compute_corr(input_nnCorr, input_trait, input_list, computing_method): - - allcorrelations = [] - for line in input_list: - tokens = line.split('","') - tokens[-1] = tokens[-1][:-2] #remove the last " - tokens[0] = tokens[0][1:] #remove the first " - - traitdataName = tokens[0] - database_trait = tokens[1:] - - if computing_method == "1": #XZ: Pearson's r - corr,nOverlap = utility.webqtlUtil.calCorrelationText(input_trait, database_trait, input_nnCorr) - else: #XZ: Spearman's rho - corr,nOverlap = utility.webqtlUtil.calCorrelationRankText(input_trait, database_trait, input_nnCorr) - traitinfo = [traitdataName,corr,nOverlap] - allcorrelations.append(traitinfo) - - return allcorrelations - -def get_correlation_method_key(form_data): - #XZ, 09/28/2008: if user select "1", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "2", then display 2, 3 and 5. - #XZ, 09/28/2008: if user select "3", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "4", then display 1, 3 and 4. - #XZ, 09/28/2008: if user select "5", then display 2, 3 and 5. - - method = form_data.method - if method not in ["1", "2", "3" ,"4", "5"]: - return "1" - - return method - - -def get_custom_trait(form_data, cursor): - """Pulls the custom trait, if it exists, out of the form data""" - trait_name = form_data.fullname - - if trait_name: - trait = webqtlTrait(fullname=trait_name, cursor=cursor) - trait.retrieveInfo() - return trait - else: - return None - - -#XZ, 09/18/2008: get the information such as value, variance of the input strain names from the form. -def get_sample_data(fd): - #print("fd is:", pf(fd.__dict__)) - if fd.allstrainlist: - mdpchoice = fd.MDPChoice - #XZ, in HTML source code, it is "BXD Only", "BXH Only", and so on - if mdpchoice == "1": - strainlist = fd.f1list + fd.strainlist - #XZ, in HTML source code, it is "Non-BXD Only", "Non-BXD Only", etc - elif mdpchoice == "2": - strainlist = [] - strainlist2 = fd.f1list + fd.strainlist - for strain in fd.allstrainlist: - if strain not in strainlist2: - strainlist.append(strain) - #So called MDP Panel - if strainlist: - strainlist = fd.f1list + fd.parlist+strainlist - #XZ, in HTML source code, it is "All Cases" - else: - strainlist = fd.allstrainlist - #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData - fd.readData(fd.allstrainlist) - else: - mdpchoice = None - strainlist = fd.strainlist - #XZ, 09/18/2008: put the trait data into dictionary fd.allTraitData - fd.readData() - - return strainlist - - - -def get_species(fd, cursor): - #XZ, 3/16/2010: variable RISet must be pass by the form - RISet = fd.RISet - #XZ, 12/12/2008: get species infomation - species = webqtlDatabaseFunction.retrieveSpecies(cursor=cursor, RISet=RISet) - return species - - -def sortTraitCorrelations(traits, method="1"): - if method in TISSUE_METHODS: - traits.sort(key=lambda trait: trait.tissue_corr != None and abs(trait.tissue_corr), reverse=True) - elif method == METHOD_LIT: - traits.sort(key=lambda trait: trait.lit_corr != None and abs(trait.lit_corr), reverse=True) - else: - traits.sort(key=lambda trait: trait.correlation != None and abs(trait.correlation), reverse=True) - - return traits - - -def auth_user_for_db(db, cursor, target_db_name, privilege, username): - """Authorize a user for access to a database if that database is - confidential. A db (identified by a record in ProbeSetFreeze) contains a - list of authorized users who may access it, as well as its confidentiality - level. - - If the current user's privilege level is greater than 'user', ie: root or - admin, then they are automatically authed, otherwise, check the - AuthorizedUsers field for the presence of their name.""" - - if db.type == 'ProbeSet': - cursor.execute('SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name) - indId, indName, indFullName, confidential, AuthorisedUsers = cursor.fetchall()[0] - - if confidential: - authorized = 0 - - #for the dataset that confidentiality is 1 - #1. 'admin' and 'root' can see all of the dataset - #2. 'user' can see the dataset that AuthorisedUsers contains his id(stored in the Id field of User table) - if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']: - authorized = 1 - else: - if username in AuthorisedUsers.split(","): - authorized = 1 - - if not authorized: - raise AuthException("The %s database you selected is not open to the public at this time, please go back and select other database." % indFullName) - - class CorrelationResults(object): corr_min_informative = 4 @@ -287,48 +99,55 @@ class CorrelationResults(object): # cellid=None) #print("start_vars: ", pf(start_vars)) - - helper_functions.get_species_dataset_trait(self, start_vars) - self.dataset.group.read_genotype_file() - - corr_samples_group = start_vars['corr_samples_group'] - - self.sample_data = {} - - #The two if statements below append samples to the sample list based upon whether the user - #selected Primary Samples Only, Other Samples Only, or All Samples - - #If either BXD/whatever Only or All Samples, append all of that group's samplelist - if corr_samples_group != 'samples_other': - self.process_samples(start_vars, self.dataset.group.samplelist, ()) - - #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and - #exclude the primary samples (because they would have been added in the previous - #if statement if the user selected All Samples) - if corr_samples_group != 'samples_primary': + with Bench("Doing correlations"): + helper_functions.get_species_dataset_trait(self, start_vars) + self.dataset.group.read_genotype_file() + + corr_samples_group = start_vars['corr_samples_group'] + + self.sample_data = {} + + #The two if statements below append samples to the sample list based upon whether the user + #rselected Primary Samples Only, Other Samples Only, or All Samples + primary_samples = (self.dataset.group.parlist + self.dataset.group.f1list + self.dataset.group.samplelist) - self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) - self.target_dataset.get_trait_data() - correlation_data = collections.defaultdict(list) - for trait, values in self.target_dataset.trait_data.iteritems(): - values_1 = [] - values_2 = [] - for index,sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - values_1.append(this_value) - values_2.append(target_value) - correlation = calCorrelation(values_1, values_2) - correlation_data[trait] = correlation - print ('correlation result: %s %s' % (trait, correlation)) + + #If either BXD/whatever Only or All Samples, append all of that group's samplelist + if corr_samples_group != 'samples_other': + self.process_samples(start_vars, primary_samples, ()) + + #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + #exclude the primary samples (because they would have been added in the previous + #if statement if the user selected All Samples) + if corr_samples_group != 'samples_primary': + self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset.get_trait_data() + self.correlation_data = {} + for trait, values in self.target_dataset.trait_data.iteritems(): + trait_values = [] + target_values = [] + for index, sample in enumerate(self.target_dataset.samplelist): + target_value = values[index] + if sample in self.sample_data.keys(): + this_value = self.sample_data[sample] + trait_values.append(this_value) + target_values.append(target_value) + (trait_values, target_values) = normalize_values(trait_values, target_values) + correlation = scipy.stats.pearsonr(trait_values, target_values) + #correlation = cal_correlation(trait_values, target_values) + self.correlation_data[trait] = correlation[0] + #print ('correlation result: %s %s' % (trait, correlation)) + + for trait in self.correlation_data: + print("correlation: ", self.correlation_data[trait]) + #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset - self.target_db_name = start_vars['corr_dataset'] + #self.target_db_name = start_vars['corr_dataset'] # Zach said this is ok # Auth if needed @@ -360,396 +179,24 @@ class CorrelationResults(object): # We will not get Literature Correlations if there is no GeneId because there is nothing # to look against - self.geneid = self.this_trait.geneid + #self.geneid = self.this_trait.geneid # We will not get Tissue Correlations if there is no gene symbol because there is nothing to look against #self.trait_symbol = myTrait.symbol #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid - self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid) + #self.input_trait_mouse_gene_id = self.translateToMouseGeneID(self.dataset.group.species, self.geneid) #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' - self.tissue_probeset_freeze_id = 1 + #self.tissue_probeset_freeze_id = 1 - traitList = self.correlate() + #traitList = self.correlate() - _log.info("Done doing correlation calculation") + #_log.info("Done doing correlation calculation") ############################################################################################################################################ - TD_LR = HT.TD(height=200,width="100%",bgColor='#eeeeee') - - mainfmName = webqtlUtil.genRandStr("fm_") - form = HT.Form(cgi = os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), - enctype='multipart/form-data', name= mainfmName, submit=HT.Input(type='hidden')) - hddn = {'FormID': 'showDatabase', - 'ProbeSetID': '_', - 'database': self.target_db_name, - 'databaseFull': self.db.fullname, - 'CellID': '_', - 'RISet': fd.RISet, - 'identification': fd.identification} - - if myTrait: - hddn['fullname'] = fd.fullname - if mdp_choice: - hddn['MDPChoice']= mdp_choice - - - #XZ, 09/18/2008: pass the trait data to next page by hidden parameters. - webqtlUtil.exportData(hddn, fd.allTraitData) - - if fd.incparentsf1: - hddn['incparentsf1']='ON' - - if fd.allstrainlist: - hddn['allstrainlist'] = string.join(fd.allstrainlist, ' ') - - - for key in hddn.keys(): - form.append(HT.Input(name=key, value=hddn[key], type='hidden')) - - #XZ, 11/21/2008: add two parameters to form - form.append(HT.Input(name="X_geneSymbol", value="", type='hidden')) - form.append(HT.Input(name="Y_geneSymbol", value="", type='hidden')) - - #XZ, 3/11/2010: add one parameter to record if the method is rank order. - form.append(HT.Input(name="rankOrder", value="%s" % rankOrder, type='hidden')) - - form.append(HT.Input(name="TissueProbeSetFreezeId", value="%s" % self.tissue_probeset_freeze_id, type='hidden')) - - #################################### - # generate the info on top of page # - #################################### - - info = self.getTopInfo(myTrait=myTrait, method=self.method, db=self.db, target_db_name=self.target_db_name, returnNumber=self.returnNumber, methodDict=self.CORRELATION_METHODS, totalTraits=traitList, identification=fd.identification ) - - ############## - # Excel file # - ############## - filename= webqtlUtil.genRandStr("Corr_") - xlsUrl = HT.Input(type='button', value = 'Download Table', onClick= "location.href='/tmp/%s.xls'" % filename, Class='button') - # Create a new Excel workbook - workbook = xl.Writer('%s.xls' % (webqtlConfig.TMPDIR+filename)) - headingStyle = workbook.add_format(align = 'center', bold = 1, border = 1, size=13, fg_color = 0x1E, color="white") - - #XZ, 3/18/2010: pay attention to the line number of header in this file. As of today, there are 7 lines. - worksheet = self.createExcelFileWithTitleAndFooter(workbook=workbook, identification=fd.identification, db=self.db, returnNumber=self.returnNumber) - - newrow = 7 - - -##################################################################### - - - #Select All, Deselect All, Invert Selection, Add to Collection - mintmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'showIntMap');" % mainfmName) - mintmap_img = HT.Image("/images/multiple_interval_mapping1_final.jpg", name='mintmap', alt="Multiple Interval Mapping", title="Multiple Interval Mapping", style="border:none;") - mintmap.append(mintmap_img) - mcorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'compCorr');" % mainfmName) - mcorr_img = HT.Image("/images/compare_correlates2_final.jpg", alt="Compare Correlates", title="Compare Correlates", style="border:none;") - mcorr.append(mcorr_img) - cormatrix = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'corMatrix');" % mainfmName) - cormatrix_img = HT.Image("/images/correlation_matrix1_final.jpg", alt="Correlation Matrix and PCA", title="Correlation Matrix and PCA", style="border:none;") - cormatrix.append(cormatrix_img) - networkGraph = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'networkGraph');" % mainfmName) - networkGraph_img = HT.Image("/images/network_graph1_final.jpg", name='mintmap', alt="Network Graphs", title="Network Graphs", style="border:none;") - networkGraph.append(networkGraph_img) - heatmap = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'heatmap');" % mainfmName) - heatmap_img = HT.Image("/images/heatmap2_final.jpg", name='mintmap', alt="QTL Heat Map and Clustering", title="QTL Heatmap and Clustering", style="border:none;") - heatmap.append(heatmap_img) - partialCorr = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'partialCorrInput');" % mainfmName) - partialCorr_img = HT.Image("/images/partial_correlation_final.jpg", name='partialCorr', alt="Partial Correlation", title="Partial Correlation", style="border:none;") - partialCorr.append(partialCorr_img) - addselect = HT.Href(url="#redirect", onClick="addRmvSelection('%s', document.getElementsByName('%s')[0], 'addToSelection');" % (fd.RISet, mainfmName)) - addselect_img = HT.Image("/images/add_collection1_final.jpg", name="addselect", alt="Add To Collection", title="Add To Collection", style="border:none;") - addselect.append(addselect_img) - selectall = HT.Href(url="#redirect", onClick="checkAll(document.getElementsByName('%s')[0]);" % mainfmName) - selectall_img = HT.Image("/images/select_all2_final.jpg", name="selectall", alt="Select All", title="Select All", style="border:none;") - selectall.append(selectall_img) - selectinvert = HT.Href(url="#redirect", onClick = "checkInvert(document.getElementsByName('%s')[0]);" % mainfmName) - selectinvert_img = HT.Image("/images/invert_selection2_final.jpg", name="selectinvert", alt="Invert Selection", title="Invert Selection", style="border:none;") - selectinvert.append(selectinvert_img) - reset = HT.Href(url="#redirect", onClick="checkNone(document.getElementsByName('%s')[0]); return false;" % mainfmName) - reset_img = HT.Image("/images/select_none2_final.jpg", alt="Select None", title="Select None", style="border:none;") - reset.append(reset_img) - selecttraits = HT.Input(type='button' ,name='selecttraits',value='Select Traits', onClick="checkTraits(this.form);",Class="button") - selectgt = HT.Input(type='text' ,name='selectgt',value='-1.0', size=6,maxlength=10,onChange="checkNumeric(this,1.0,'-1.0','gthan','greater than filed')") - selectlt = HT.Input(type='text' ,name='selectlt',value='1.0', size=6,maxlength=10,onChange="checkNumeric(this,-1.0,'1.0','lthan','less than field')") - selectandor = HT.Select(name='selectandor') - selectandor.append(('AND','and')) - selectandor.append(('OR','or')) - selectandor.selected.append('AND') - - - #External analysis tools - GCATButton = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GCAT');" % mainfmName) - GCATButton_img = HT.Image("/images/GCAT_logo_final.jpg", name="GCAT", alt="GCAT", title="GCAT", style="border:none") - GCATButton.append(GCATButton_img) - - ODE = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'ODE');" % mainfmName) - ODE_img = HT.Image("/images/ODE_logo_final.jpg", name="ode", alt="ODE", title="ODE", style="border:none") - ODE.append(ODE_img) - - ''' - #XZ, 07/07/2010: I comment out this block of code. - WebGestaltScript = HT.Script(language="Javascript") - WebGestaltScript.append(""" -setTimeout('openWebGestalt()', 2000); -function openWebGestalt(){ -var thisForm = document['WebGestalt']; -makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); -} - """ % (mainfmName, len(traitList))) - ''' - - self.cursor.execute('SELECT GeneChip.GO_tree_value FROM GeneChip, ProbeFreeze, ProbeSetFreeze WHERE GeneChip.Id = ProbeFreeze.ChipId and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and ProbeSetFreeze.Name = "%s"' % self.db.name) - result = self.cursor.fetchone() - - if result: - GO_tree_value = result[0] - - if GO_tree_value: - - WebGestalt = HT.Href(url="#redirect", onClick="databaseFunc(document.getElementsByName('%s')[0], 'GOTree');" % mainfmName) - WebGestalt_img = HT.Image("/images/webgestalt_icon_final.jpg", name="webgestalt", alt="Gene Set Analysis Toolkit", title="Gene Set Analysis Toolkit", style="border:none") - WebGestalt.append(WebGestalt_img) - - hddnWebGestalt = { - 'id_list':'', - 'correlation':'', - 'id_value':'', - 'llid_list':'', - 'id_type':GO_tree_value, - 'idtype':'', - 'species':'', - 'list':'', - 'client':''} - - hddnWebGestalt['ref_type'] = hddnWebGestalt['id_type'] - hddnWebGestalt['cat_type'] = 'GO' - hddnWebGestalt['significancelevel'] = 'Top10' - - if self.species == 'rat': - hddnWebGestalt['org'] = 'Rattus norvegicus' - elif self.species == 'human': - hddnWebGestalt['org'] = 'Homo sapiens' - elif self.species == 'mouse': - hddnWebGestalt['org'] = 'Mus musculus' - else: - hddnWebGestalt['org'] = '' - - for key in hddnWebGestalt.keys(): - form.append(HT.Input(name=key, value=hddnWebGestalt[key], type='hidden')) - - - #Create tables with options, etc - - pageTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="100%", border=0, align="Left") - - containerTable = HT.TableLite(cellSpacing=0,cellPadding=0,width="90%",border=0, align="Left") - - - if not GO_tree_value: - optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="480", height="80", border=0, align="Left") - optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), align="left")) - optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"))) - else: - optionsTable = HT.TableLite(cellSpacing=2, cellPadding=0,width="560", height="80", border=0, align="Left") - optionsTable.append(HT.TR(HT.TD(selectall), HT.TD(reset), HT.TD(selectinvert), HT.TD(addselect), HT.TD(GCATButton), HT.TD(ODE), HT.TD(WebGestalt), align="left")) - optionsTable.append(HT.TR(HT.TD(" "*1,"Select"), HT.TD("Deselect"), HT.TD(" "*1,"Invert"), HT.TD(" "*3,"Add"), HT.TD("Gene Set"), HT.TD(" "*2,"GCAT"), HT.TD(" "*3, "ODE"))) - containerTable.append(HT.TR(HT.TD(optionsTable))) - - functionTable = HT.TableLite(cellSpacing=2,cellPadding=0,width="480",height="80", border=0, align="Left") - functionRow = HT.TR(HT.TD(networkGraph, width="16.7%"), HT.TD(cormatrix, width="16.7%"), HT.TD(partialCorr, width="16.7%"), HT.TD(mcorr, width="16.7%"), HT.TD(mintmap, width="16.7%"), HT.TD(heatmap), align="left") - labelRow = HT.TR(HT.TD(" "*1,HT.Text("Graph")), HT.TD(" "*1,HT.Text("Matrix")), HT.TD(" "*1,HT.Text("Partial")), HT.TD(HT.Text("Compare")), HT.TD(HT.Text("QTL Map")), HT.TD(HT.Text(text="Heat Map"))) - functionTable.append(functionRow, labelRow) - containerTable.append(HT.TR(HT.TD(functionTable), HT.BR())) - - #more_options = HT.Image("/images/more_options1_final.jpg", name='more_options', alt="Expand Options", title="Expand Options", style="border:none;", Class="toggleShowHide") - - #containerTable.append(HT.TR(HT.TD(more_options, HT.BR(), HT.BR()))) - - moreOptions = HT.Input(type='button',name='options',value='More Options', onClick="",Class="toggle") - fewerOptions = HT.Input(type='button',name='options',value='Fewer Options', onClick="",Class="toggle") - - """ - if (fd.formdata.getvalue('showHideOptions') == 'less'): - containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(fewerOptions, Class="toggleShowHide")))) - containerTable.append(HT.TR(HT.TD(" "))) - else: - containerTable.append(HT.TR(HT.TD(" "), height="10"), HT.TR(HT.TD(HT.Div(moreOptions, Class="toggleShowHide")))) - containerTable.append(HT.TR(HT.TD(" "))) - """ - - containerTable.append(HT.TR(HT.TD(HT.Span(selecttraits,' with r > ',selectgt, ' ',selectandor, ' r < ',selectlt,Class="bd1 cbddf fs11")), style="display:none;", Class="extra_options")) - - chrMenu = HT.Input(type='hidden',name='chromosomes',value='all') - - corrHeading = HT.Paragraph('Correlation Table', Class="title") - - - tblobj = {} - - if self.db.type=="Geno": - containerTable.append(HT.TR(HT.TD(xlsUrl, height=60))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - tblobj['header'], worksheet = self.getTableHeaderForGeno( method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForGeno(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript) - - workbook.close() - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") - - pageTable.append(HT.TR(HT.TD(div))) - - form.append(HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - HT.P(), HT.P(), pageTable) - TD_LR.append(corrHeading, info, form, HT.P()) - - self.dict['body'] = str(TD_LR) - self.dict['js1'] = '' - self.dict['title'] = 'Correlation' - - elif self.db.type=="Publish": - - containerTable.append(HT.TR(HT.TD(xlsUrl, height=40))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - tblobj['header'], worksheet = self.getTableHeaderForPublish(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForPublish(traitList=traitList, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) - - workbook.close() - - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - # NL, 07/27/2010. genTableObj function has been moved from templatePage.py to webqtlUtil.py; - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1"), corrScript, Id="sortable") - - pageTable.append(HT.TR(HT.TD(div))) - - form.append( - HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - HT.P(), pageTable) - TD_LR.append(corrHeading, info, form, HT.P()) - - self.dict['body'] = str(TD_LR) - self.dict['js1'] = '' - self.dict['title'] = 'Correlation' - - - elif self.db.type=="ProbeSet": - tblobj['header'], worksheet = self.getTableHeaderForProbeSet(method=self.method, worksheet=worksheet, newrow=newrow, headingStyle=headingStyle) - newrow += 1 - - sortby = self.getSortByValue( calculationMethod = self.method ) - - corrScript = HT.Script(language="Javascript") - corrScript.append("var corrArray = new Array();") - - tblobj['body'], worksheet, corrScript = self.getTableBodyForProbeSet(traitList=traitList, primaryTrait=myTrait, formName=mainfmName, worksheet=worksheet, newrow=newrow, corrScript=corrScript, species=self.species) - - workbook.close() - objfile = open('%s.obj' % (webqtlConfig.TMPDIR+filename), 'wb') - cPickle.dump(tblobj, objfile) - objfile.close() - - #XZ: here is the table of traits - div = HT.Div(webqtlUtil.genTableObj(tblobj=tblobj, file=filename, sortby=sortby, tableID = "sortable", addIndex = "1", hiddenColumns=["Gene ID","Homologene ID"]), corrScript, Id="sortable") - - - #XZ, 01/12/2009: create database menu for 'Add Correlation' - self.cursor.execute(""" - select - ProbeSetFreeze.FullName, ProbeSetFreeze.Id, Tissue.name - from - ProbeSetFreeze, ProbeFreeze, ProbeSetFreeze as ps2, ProbeFreeze as p2, Tissue - where - ps2.Id = %d - and ps2.ProbeFreezeId = p2.Id - and ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id - and (ProbeFreeze.InbredSetId = p2.InbredSetId or (ProbeFreeze.InbredSetId in (1, 3) and p2.InbredSetId in (1, 3))) - and p2.ChipId = ProbeFreeze.ChipId - and ps2.Id != ProbeSetFreeze.Id - and ProbeFreeze.TissueId = Tissue.Id - and ProbeSetFreeze.public > %d - order by - ProbeFreeze.TissueId, ProbeSetFreeze.CreateTime desc - """ % (self.db.id, webqtlConfig.PUBLICTHRESH)) - - results = self.cursor.fetchall() - dbCustomizer = HT.Select(results, name = "customizer") - databaseMenuSub = preTissue = "" - for item in results: - TName, TId, TTissue = item - if TTissue != preTissue: - if databaseMenuSub: - dbCustomizer.append(databaseMenuSub) - databaseMenuSub = HT.Optgroup(label = '%s mRNA ------' % TTissue) - preTissue = TTissue - - databaseMenuSub.append(item[:2]) - if databaseMenuSub: - dbCustomizer.append(databaseMenuSub) - - #updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js - #variables: filename, strainIds and vals are required by getquerystring function - strainIds=self.getStrainIds(species=self.species, strains=self.sample_names) - var1 = HT.Input(name="filename", value=filename, type='hidden') - var2 = HT.Input(name="strainIds", value=strainIds, type='hidden') - var3 = HT.Input(name="vals", value=vals, type='hidden') - customizerButton = HT.Input(type="button", Class="button", value="Add Correlation", onClick = "xmlhttpPost('%smain.py?FormID=AJAX_table', 'sortable', (getquerystring(this.form)))" % webqtlConfig.CGIDIR) - - containerTable.append(HT.TR(HT.TD(HT.Span(var1,var2,var3,customizerButton, "with", dbCustomizer, Class="bd1 cbddf fs11"), HT.BR(), HT.BR()), style="display:none;", Class="extra_options")) - - containerTable.append(HT.TR(HT.TD(xlsUrl, HT.BR(), HT.BR()))) - - pageTable.append(HT.TR(HT.TD(containerTable))) - - pageTable.append(HT.TR(HT.TD(div))) - - if self.species == 'human': - heatmap = "" - - form.append(HT.Input(name='ShowStrains',type='hidden', value =1), - HT.Input(name='ShowLine',type='hidden', value =1), - info, HT.BR(), pageTable, HT.BR()) - - TD_LR.append(corrHeading, form, HT.P()) - - - self.dict['body'] = str(TD_LR) - self.dict['title'] = 'Correlation' - # updated by NL. Delete function generateJavaScript, move js files to dhtml.js, webqtl.js and jqueryFunction.js - self.dict['js1'] = '' - self.dict['js2'] = 'onLoad="pageOffset()"' - self.dict['layer'] = self.generateWarningLayer() - else: - self.dict['body'] = "" def get_all_dataset_data(self): @@ -783,21 +230,6 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id """ - - #def process_samples(self, start_vars, sample_names, excluded_samples): - # for sample in sample_names: - # if sample not in excluded_samples: - # value = start_vars['value:' + sample] - # variance = start_vars['variance:' + sample] - # if variance.strip().lower() == 'x': - # variance = 0 - # else: - # variance = float(variance) - # if value.strip().lower() != 'x': - # self.samples.append(str(sample)) - # self.vals.append(float(value)) - # #self.variances.append(variance) - def process_samples(self, start_vars, sample_names, excluded_samples): for sample in sample_names: if sample not in excluded_samples: @@ -807,87 +239,6 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php'); else: self.sample_data[str(sample)] = float(value) - def getSortByValue(self, calculationMethod): - - if calculationMethod == "1": - sortby = ("Sample p(r)", "up") - elif calculationMethod == "2": - sortby = ("Sample p(rho)", "up") - elif calculationMethod == "3": #XZ: literature correlation - sortby = ("Lit Corr","down") - elif calculationMethod == "4": #XZ: tissue correlation - sortby = ("Tissue r", "down") - elif calculationMethod == "5": - sortby = ("Tissue rho", "down") - - return sortby - - - - def generateWarningLayer(self): - - layerString = """ - - - - - """ - - return layerString - - - #XZ, 01/07/2009: In HTML code, the variable 'database' corresponds to the column 'Name' in database table. - def getFileName(self, target_db_name): ### dcrowell August 2008 - """Returns the name of the reference database file with which correlations are calculated. - Takes argument cursor which is a cursor object of any instance of a subclass of templatePage - Used by correlationPage""" - - trait_id, full_name = g.db.execute("""SELECT Id, FullName - FROM ProbeSetFreeze - WHERE Name = '%s'""" % target_db_name).fetchone() - for char in [' ', '/']: - full_name = full_name.replace(char, '_') - - file_name = 'ProbeSetFreezeId_' + str(trait_id) + '_FullName_' + full_name + '.txt' - - return file_name - - - - #XZ, 01/29/2009: I modified this function. - #XZ: Note that the type of StrainIds must be number, not string. - def getStrainIds(self, species=None, strains=[]): - StrainIds = [] - for item in strains: - self.cursor.execute('''SELECT Strain.Id FROM Strain, Species WHERE - Strain.Name="%s" and Strain.SpeciesId=Species.Id and Species.name = "%s" ''' % (item, species)) - Id = self.cursor.fetchone()[0] - StrainIds.append(Id) - - return StrainIds - #XZ, 12/12/2008: if the species is rat or human, translate the geneid to mouse geneid #XZ, 12/12/2008: if the input geneid is 'None', return 0 @@ -921,26 +272,26 @@ Resorting this table
return mouse_geneid - #XZ, 12/16/2008: the input geneid is of mouse type - def checkForLitInfo(self,geneId): - q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId - self.cursor.execute(q) - try: - x = self.cursor.fetchone() - if x: return True - else: raise - except: return False + ##XZ, 12/16/2008: the input geneid is of mouse type + #def checkForLitInfo(self,geneId): + # q = 'SELECT 1 FROM LCorrRamin3 WHERE GeneId1=%s LIMIT 1' % geneId + # self.cursor.execute(q) + # try: + # x = self.cursor.fetchone() + # if x: return True + # else: raise + # except: return False - #XZ, 12/16/2008: the input geneid is of mouse type - def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): - q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) - self.cursor.execute(q) - try: - x = self.cursor.fetchone() - if x: return True - else: raise - except: return False + ##XZ, 12/16/2008: the input geneid is of mouse type + #def checkSymbolForTissueCorr(self, tissueProbeSetFreezeId=0, symbol=""): + # q = "SELECT 1 FROM TissueProbeSetXRef WHERE TissueProbeSetFreezeId=%s and Symbol='%s' LIMIT 1" % (tissueProbeSetFreezeId,symbol) + # self.cursor.execute(q) + # try: + # x = self.cursor.fetchone() + # if x: return True + # else: raise + # except: return False def fetchAllDatabaseData(self, species, GeneId, GeneSymbol, strains, db, method, returnNumber, tissueProbeSetFreezeId): @@ -1247,7 +598,6 @@ Resorting this table
#Todo: Redo cached stuff using memcached if False: - _log.info("Using the fast method because the file exists") lit_corrs = {} tissue_corrs = {} use_lit = False @@ -1366,10 +716,24 @@ Resorting this table
datasetFile.close() totalTraits = len(allcorrelations) _log.info("Done correlating using the fast method") - + def correlate(self): + self.correlation_data = collections.defaultdict(list) + for trait, values in self.target_dataset.trait_data.iteritems(): + values_1 = [] + values_2 = [] + for index,sample in enumerate(self.target_dataset.samplelist): + target_value = values[index] + if sample in self.sample_data.keys(): + this_value = self.sample_data[sample] + values_1.append(this_value) + values_2.append(target_value) + correlation = calCorrelation(values_1, values_2) + self.correlation_data[trait] = correlation + print ('correlation result: %s %s' % (trait, correlation)) + """ correlations = [] #XZ: Use the fast method only for probeset dataset, and this dataset must have been created. @@ -1466,6 +830,7 @@ Resorting this table
method=self.method) return trait_list + """ def calculateCorrOfAllTissueTrait(self, primaryTraitSymbol=None, TissueProbeSetFreezeId=None, method=None): @@ -1523,55 +888,6 @@ Resorting this table
return traitList - def getTopInfo(self, myTrait=None, method=None, db=None, target_db_name=None, returnNumber=None, methodDict=None, totalTraits=None, identification=None ): - - if myTrait: - if method in ["1","2"]: #genetic correlation - info = HT.Paragraph("Values of Record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), - " database were compared to all %d records in the " % self.record_count, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), - ' database. The top %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - else: - #myTrait.retrieveInfo()#need to know geneid and symbol - if method == "3":#literature correlation - searchDBName = "Literature Correlation" - searchDBLink = "/correlationAnnotation.html#literatureCorr" - else: #tissue correlation - searchDBName = "Tissue Correlation" - searchDBLink = "/correlationAnnotation.html#tissueCorr" - info = HT.Paragraph("Your input record %s in the " % myTrait.getGivenName(), HT.Href(text=myTrait.db.fullname,url=webqtlConfig.INFOPAGEHREF % myTrait.db.name,target="_blank", Class="fwn"), - " database corresponds to ", - HT.Href(text='gene Id %s, and gene symbol %s' % (myTrait.geneid, myTrait.symbol), target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % myTrait.geneid, Class="fs12 fwn"), - '. GN ranked all genes in the ', HT.Href(text=searchDBName,url=searchDBLink,target="_blank", Class="fwn"),' database by the %s.' % methodDict[method], - ' The top %d probes or probesets in the ' % returnNumber, HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank", Class="fwn"), - ' database corresponding to the top genes ranked by the %s are displayed.' %( methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.' ) - - elif identification: - info = HT.Paragraph('Values of %s were compared to all %d traits in ' % (identification, self.record_count), - HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), - ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - - else: - info = HT.Paragraph('Trait values were compared to all values in ', - HT.Href(text=db.fullname,url=webqtlConfig.INFOPAGEHREF % target_db_name,target="_blank",Class="fwn"), - ' database. The TOP %d correlations ranked by the %s are displayed.' % (returnNumber,methodDict[method]), - ' You can resort this list using the small arrowheads in the top row.') - - if db.type=="Geno": - info.append(HT.BR(),HT.BR(),'Clicking on the Locus will open the genotypes data for that locus. Click on the correlation to see a scatter plot of the trait data.') - elif db.type=="Publish": - info.append(HT.BR(),HT.BR(),'Clicking on the record ID will open the published phenotype data for that publication. Click on the correlation to see a scatter plot of the trait data. ') - elif db.type=="ProbeSet": - info.append(HT.BR(),'Click the correlation values to generate scatter plots. Select the Record ID to open the Trait Data and Analysis form. Select the symbol to open NCBI Entrez.') - else: - pass - - - return info - - def createExcelFileWithTitleAndFooter(self, workbook=None, identification=None, db=None, returnNumber=None): worksheet = workbook.add_worksheet() @@ -1691,463 +1007,19 @@ Resorting this table
return tblobj_body, worksheet, corrScript - - def getTableHeaderForPublish(self, method=None, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - if method in ["1","3","4"]: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), - THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), - THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), - THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), - THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=7), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=9)]] - - for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample r", "N Cases", "Sample p(r)"]): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - else: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Record ID", idx=1), - THCell(HT.TD('Phenotype', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Phenotype", idx=2), - THCell(HT.TD('Authors', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Authors", idx=3), - THCell(HT.TD('Year', HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Year", idx=4), - THCell(HT.TD('Max',HT.BR(), 'LRS', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS", idx=5), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap="on"), text="Max LRS Location", idx=6), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=7), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=8), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=9)]] - - for ncol, item in enumerate(["Record", "Phenotype", "Authors", "Year", "Pubmed Id", "Max LRS", "Max LRS Location (Chr: Mb)", "Sample rho", "N Cases", "Sample p(rho)"]): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - - return tblobj_header, worksheet - - - def getTableBodyForPublish(self, traitList, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): - - tblobj_body = [] - - for thisTrait in traitList: - tr = [] - - trId = str(thisTrait) - - corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) - - tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) - - tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn"), nowrap="yes",align="center", Class="fs12 fwn b1 c222"),str(thisTrait.name), thisTrait.name)) - - PhenotypeString = thisTrait.post_publication_description - if thisTrait.confidential: - if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=thisTrait.authorized_users): - PhenotypeString = thisTrait.pre_publication_description - - tr.append(TDCell(HT.TD(PhenotypeString, Class="fs12 fwn b1 c222"), PhenotypeString, PhenotypeString.upper())) - - tr.append(TDCell(HT.TD(thisTrait.authors, Class="fs12 fwn b1 c222 fsI"),thisTrait.authors, thisTrait.authors.strip().upper())) - - try: - PubMedLinkText = myear = repr = int(thisTrait.year) - except: - PubMedLinkText = repr = "--" - myear = 0 - if thisTrait.pubmed_id: - PubMedLink = HT.Href(text= repr,url= webqtlConfig.PUBMEDLINK_URL % thisTrait.pubmed_id,target='_blank', Class="fs12 fwn") - else: - PubMedLink = repr - - tr.append(TDCell(HT.TD(PubMedLink, Class="fs12 fwn b1 c222", align='center'), repr, myear)) - - #LRS and its location - LRS_score_repr = '--' - LRS_score_value = 0 - LRS_location_repr = '--' - LRS_location_value = 1000000 - LRS_flag = 1 - - #Max LRS and its Locus location - if thisTrait.lrs and thisTrait.locus: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, thisTrait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - - LRS_score_repr = '%3.1f' % thisTrait.lrs - LRS_score_value = thisTrait.lrs - LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) - LRS_flag = 0 - - #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - if LRS_flag: - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - repr = '%3.4f' % thisTrait.corr - tr.append(TDCell(HT.TD(HT.Href(text=repr,url="javascript:showCorrPlot('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222", align='right',nowrap="on"), repr, abs(thisTrait.corr))) - - repr = '%d' % thisTrait.nOverlap - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) - - repr = webqtlUtil.SciFloat(thisTrait.corrPValue) - tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) - - tblobj_body.append(tr) - - for ncol, item in enumerate([thisTrait.name, PhenotypeString, thisTrait.authors, thisTrait.year, thisTrait.pubmed_id, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue]): - worksheet.write([newrow, ncol], item) - newrow += 1 - - return tblobj_body, worksheet, corrScript - - - def getTableHeaderForProbeSet(self, method=None, worksheet=None, newrow=None, headingStyle=None): - - tblobj_header = [] - - if method in ["1","3","4"]: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), - THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), - THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), - THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), - THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), - THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), - THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), - THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample r", idx=10), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(r)", idx=12), - THCell(HT.TD(HT.Href( - text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#literatureCorr"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), - #XZ, 09/22/2008: tissue correlation - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'r', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue r", idx=14), - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'p(r)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(r)", idx=15)]] - - for ncol, item in enumerate(['Record', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample r', 'N Cases', 'Sample p(r)', 'Lit Corr', 'Tissue r', 'Tissue p(r)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - else: - tblobj_header = [[THCell(HT.TD(' ', Class="fs13 fwb ffl b1 cw cbrb",nowrap='ON'), sort=0), - THCell(HT.TD('Record',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Record ID", idx=1), - THCell(HT.TD('Gene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Gene ID", idx=2), - THCell(HT.TD('Homologene',HT.BR(), 'ID',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Homologene ID", idx=3), - THCell(HT.TD('Symbol',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Symbol", idx=4), - THCell(HT.TD('Description',HT.BR(),HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Description", idx=5), - THCell(HT.TD('Location',HT.BR(), 'Chr and Mb', HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Location (Chr: Mb)", idx=6), - THCell(HT.TD('Mean',HT.BR(),'Expr',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="Mean Expr", idx=7), - THCell(HT.TD('Max',HT.BR(),'LRS',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS", idx=8), - THCell(HT.TD('Max LRS Location',HT.BR(),'Chr and Mb',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Max LRS Location (Chr: Mb)", idx=9), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample rho", idx=10), - THCell(HT.TD('N',HT.BR(),'Cases',HT.BR(), Class="fs13 fwb ffl b1 cw cbrb"), text="N Cases", idx=11), - THCell(HT.TD(HT.Href( - text = HT.Span('Sample',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#genetic_p_rho"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Sample p(rho)", idx=12), - THCell(HT.TD(HT.Href( - text = HT.Span('Lit',HT.BR(), 'Corr', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#literatureCorr"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Lit Corr", idx=13), - #XZ, 09/22/2008: tissue correlation - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'rho', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue rho", idx=14), - THCell(HT.TD(HT.Href( - text = HT.Span('Tissue',HT.BR(), 'p(rho)', HT.Sup(' ?', style="color:#f00"),HT.BR(), Class="fs13 fwb ffl cw"), - target = '_blank', - url = "/correlationAnnotation.html#tissue_p_r"), - Class="fs13 fwb ffl b1 cw cbrb", nowrap='ON'), text="Tissue p(rho)", idx=15)]] - - for ncol, item in enumerate(['Record ID', 'Gene ID', 'Homologene ID', 'Symbol', 'Description', 'Location (Chr: Mb)', 'Mean Expr', 'Max LRS', 'Max LRS Location (Chr: Mb)', 'Sample rho', 'N Cases', 'Sample p(rho)', 'Lit Corr', 'Tissue rho', 'Tissue p(rho)']): - worksheet.write([newrow, ncol], item, headingStyle) - worksheet.set_column([ncol, ncol], 2*len(item)) - - return tblobj_header, worksheet - - - def getTableBodyForProbeSet(self, traitList=[], primaryTrait=None, formName=None, worksheet=None, newrow=None, corrScript=None, species=''): - - tblobj_body = [] - - for thisTrait in traitList: - - if thisTrait.symbol: - pass - else: - thisTrait.symbol = "--" - - if thisTrait.geneid: - symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" % thisTrait.geneid, Class="fs12 fwn") - else: - symbolurl = HT.Href(text=thisTrait.symbol,target='_blank',url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=search&DB=gene&term=%s" % thisTrait.symbol, Class="fs12 fwn") - - tr = [] - - trId = str(thisTrait) - - corrScript.append('corrArray["%s"] = {corr:%1.4f};' % (trId, thisTrait.corr)) - - #XZ, 12/08/2008: checkbox - tr.append(TDCell(HT.TD(HT.Input(type="checkbox", Class="checkbox", name="searchResult",value=trId, onClick="highlight(this)"), nowrap="on", Class="fs12 fwn ffl b1 c222"), text=trId)) - - #XZ, 12/08/2008: probeset name - tr.append(TDCell(HT.TD(HT.Href(text=thisTrait.name,url="javascript:showTrait('%s', '%s')" % (formName,thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn b1 c222"), thisTrait.name, thisTrait.name.upper())) - - #XZ, 12/08/2008: gene id - if thisTrait.geneid: - tr.append(TDCell(None, thisTrait.geneid, val=999)) - else: - tr.append(TDCell(None, thisTrait.geneid, val=999)) - - #XZ, 12/08/2008: homologene id - if thisTrait.homologeneid: - tr.append(TDCell("", thisTrait.homologeneid, val=999)) - else: - tr.append(TDCell("", thisTrait.homologeneid, val=999)) - - #XZ, 12/08/2008: gene symbol - tr.append(TDCell(HT.TD(symbolurl, Class="fs12 fwn b1 c222 fsI"),thisTrait.symbol, thisTrait.symbol.upper())) - - #XZ, 12/08/2008: description - #XZ, 06/05/2009: Rob asked to add probe target description - description_string = str(thisTrait.description).strip() - target_string = str(thisTrait.probe_target_description).strip() - - description_display = '' - - if len(description_string) > 1 and description_string != 'None': - description_display = description_string - else: - description_display = thisTrait.symbol - - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': - description_display = description_display + '; ' + target_string.strip() - - tr.append(TDCell(HT.TD(description_display, Class="fs12 fwn b1 c222"), description_display, description_display)) - - #XZ: trait_location_value is used for sorting - trait_location_repr = '--' - trait_location_value = 1000000 - - if thisTrait.chr and thisTrait.mb: - try: - trait_location_value = int(thisTrait.chr)*1000 + thisTrait.mb - except: - if thisTrait.chr.upper() == 'X': - trait_location_value = 20*1000 + thisTrait.mb - else: - trait_location_value = ord(str(thisTrait.chr).upper()[0])*1000 + thisTrait.mb - - trait_location_repr = 'Chr%s: %.6f' % (thisTrait.chr, float(thisTrait.mb) ) - - tr.append(TDCell(HT.TD(trait_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), trait_location_repr, trait_location_value)) - - """ - #XZ, 12/08/2008: chromosome number - #XZ, 12/10/2008: use Mbvalue to sort chromosome - tr.append(TDCell( HT.TD(thisTrait.chr, Class="fs12 fwn b1 c222", align='right'), thisTrait.chr, Mbvalue) ) - - #XZ, 12/08/2008: Rob wants 6 digit precision, and we have to deal with that the mb could be None - if not thisTrait.mb: - tr.append(TDCell(HT.TD(thisTrait.mb, Class="fs12 fwn b1 c222",align='right'), thisTrait.mb, Mbvalue)) - else: - tr.append(TDCell(HT.TD('%.6f' % thisTrait.mb, Class="fs12 fwn b1 c222", align='right'), thisTrait.mb, Mbvalue)) - """ - - - - #XZ, 01/12/08: This SQL query is much faster. - self.cursor.execute(""" - select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %d and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' - """ % (thisTrait.db.id, thisTrait.name)) - result = self.cursor.fetchone() - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - repr = "%2.3f" % mean - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right', nowrap='ON'),repr, mean)) - - #LRS and its location - LRS_score_repr = '--' - LRS_score_value = 0 - LRS_location_repr = '--' - LRS_location_value = 1000000 - LRS_flag = 1 - - #Max LRS and its Locus location - if thisTrait.lrs and thisTrait.locus: - self.cursor.execute(""" - select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and - Geno.SpeciesId = Species.Id - """ % (species, thisTrait.locus)) - result = self.cursor.fetchone() - - if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) - - - LRS_score_repr = '%3.1f' % thisTrait.lrs - LRS_score_value = thisTrait.lrs - LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb) ) - LRS_flag = 0 - - #tr.append(TDCell(HT.TD(HT.Href(text=LRS_score_repr,url="javascript:showIntervalMapping('%s', '%s : %s')" % (formName, thisTrait.db.shortname, thisTrait.name), Class="fs12 fwn"), Class="fs12 fwn ffl b1 c222", align='right', nowrap="on"),LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222", align='right', nowrap="on"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222", nowrap="on"), LRS_location_repr, LRS_location_value)) - - if LRS_flag: - tr.append(TDCell(HT.TD(LRS_score_repr, Class="fs12 fwn b1 c222"), LRS_score_repr, LRS_score_value)) - tr.append(TDCell(HT.TD(LRS_location_repr, Class="fs12 fwn b1 c222"), LRS_location_repr, LRS_location_value)) - - - #XZ, 12/08/2008: generic correlation - repr='%3.3f' % thisTrait.corr - tr.append(TDCell(HT.TD(HT.Href(text=repr, url="javascript:showCorrPlot('%s', '%s')" % (formName, thisTrait.name), Class="fs12 fwn ffl"), Class="fs12 fwn ffl b1 c222", align='right'),repr,abs(thisTrait.corr))) - - #XZ, 12/08/2008: number of overlaped cases - repr = '%d' % thisTrait.nOverlap - tr.append(TDCell(HT.TD(repr, Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.nOverlap)) - - #XZ, 12/08/2008: p value of genetic correlation - repr = webqtlUtil.SciFloat(thisTrait.corrPValue) - tr.append(TDCell(HT.TD(repr,nowrap='ON', Class="fs12 fwn ffl b1 c222", align='right'),repr,thisTrait.corrPValue)) - - #XZ, 12/08/2008: literature correlation - LCorr = 0.0 - LCorrStr = "--" - if hasattr(thisTrait, 'LCorr') and thisTrait.LCorr: - LCorr = thisTrait.LCorr - LCorrStr = "%2.3f" % thisTrait.LCorr - tr.append(TDCell(HT.TD(LCorrStr, Class="fs12 fwn b1 c222", align='right'), LCorrStr, abs(LCorr))) - - #XZ, 09/22/2008: tissue correlation. - TCorr = 0.0 - TCorrStr = "--" - #XZ, 11/20/2008: need to pass two geneids: input_trait_mouse_geneid and thisTrait.mouse_geneid - if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissueCorr: - TCorr = thisTrait.tissueCorr - TCorrStr = "%2.3f" % thisTrait.tissueCorr - # NL, 07/19/2010: add a new parameter rankOrder for js function 'showTissueCorrPlot' - rankOrder = self.RANK_ORDERS[self.method] - TCorrPlotURL = "javascript:showTissueCorrPlot('%s','%s','%s',%d)" %(formName, primaryTrait.symbol, thisTrait.symbol,rankOrder) - tr.append(TDCell(HT.TD(HT.Href(text=TCorrStr, url=TCorrPlotURL, Class="fs12 fwn ff1"), Class="fs12 fwn ff1 b1 c222", align='right'), TCorrStr, abs(TCorr))) - else: - tr.append(TDCell(HT.TD(TCorrStr, Class="fs12 fwn b1 c222", align='right'), TCorrStr, abs(TCorr))) - - #XZ, 12/08/2008: p value of tissue correlation - TPValue = 1.0 - TPValueStr = "--" - if hasattr(thisTrait, 'tissueCorr') and thisTrait.tissuePValue: #XZ, 09/22/2008: thisTrait.tissuePValue can't be used here because it could be 0 - TPValue = thisTrait.tissuePValue - TPValueStr = "%2.3f" % thisTrait.tissuePValue - tr.append(TDCell(HT.TD(TPValueStr, Class="fs12 fwn b1 c222", align='right'), TPValueStr, TPValue)) - - tblobj_body.append(tr) - - for ncol, item in enumerate([thisTrait.name, thisTrait.geneid, thisTrait.homologeneid, thisTrait.symbol, thisTrait.description, trait_location_repr, mean, LRS_score_repr, LRS_location_repr, thisTrait.corr, thisTrait.nOverlap, thisTrait.corrPValue, LCorr, TCorr, TPValue]): - worksheet.write([newrow, ncol], item) - - newrow += 1 - - return tblobj_body, worksheet, corrScript +def normalize_values(values_1, values_2): + N = min(len(values_1), len(values_2)) + X = [] + Y = [] + for i in range(N): + if values_1[i]!= None and values_2[i]!= None: + X.append(values_1[i]) + Y.append(values_2[i]) + + return (X, Y) -def calCorrelation(values_1, values_2): +def cal_correlation(values_1, values_2): N = min(len(values_1), len(values_2)) X = [] Y = [] diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 40c14aaa..be750a0c 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -1,1358 +1,53 @@ - {% extends "base.html" %} - {% block title %}Correlation{% endblock %} - {% block content %} - - - - - - - - -

Correlation Table

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Values of Record 1436869_at in the Hippocampus Consortium M430v2 (Jun06) PDNN database were compared to all 45101 records in the Hippocampus Consortium M430v2 (Jun06) PDNN database. The top 500 correlations ranked by the Genetic Correlation (Pearson's r) are displayed. You can resort this list using the small arrowheads in the top row.
Click the correlation values to generate scatter plots. Select the Record ID to open the Trait Data and Analysis form. Select the symbol to open NCBI Entrez.



Select AllSelect NoneInvert SelectionAdd To Collection   Gene WeaverGCATGene Set Analysis Toolkit
 SelectDeselect Invert   AddGene Weaver  GCATGene Set
Network GraphsCorrelation Matrix and PCAPartial CorrelationCompare CorrelatesMultiple Interval MappingQTL Heat Map and Clustering
 Graph Matrix PartialCompareQTL MapHeat Map
 
-
      -
 
 
Record
ID
sortup.gifsortdown.gif
Symbol

sortup.gifsortdown.gif
Description

sortup.gifsortdown.gif
Location
Chr and Mb
sortup.gifsortdown.gif
Mean
Expr
sortup.gifsortdown.gif
Max
LRS
sortup.gifsortdown.gif
Max LRS Location
Chr and Mb
sortup.gifsortdown.gif
Sample
r ?
sortup.gifsortdown.gif
N
Cases
sortup.gifsortdown.gif
Sample
p(r) ?
sortupon.gifsortdown.gif
Lit
Corr ?
sortup.gifsortdown.gif
Tissue
r ?
sortup.gifsortdown.gif
Tissue
p(r) ?
sortup.gifsortdown.gif
1 -1436869_atShhsonic hedgehog; mid distal 3' UTRChr5: 28.7836959.27912.8Chr1: 193.7319961.000710.00e+001.0001.000--
2 -1434987_atAldh2aldehyde dehydrogenase 2, mitochondrial; last two exonsChr5: 122.0180108.45311.7Chr3: 10.3271010.973710.00e+000.2370.1440.483
3 -1420293_atAytl2acyltransferase like 2; distal 3' UTRChr13: 73.6536748.49212.2Chr15: 13.1492480.959710.00e+000.2270.1640.424
4 -1459623_atA630075K04RikA630075K04Rik; intergenic sequenceChr2: 156.6281617.41813.3Chr15: 13.1492480.958710.00e+00---0.1900.352
5 -1449781_atAA517650AA517650Chr13: 47.4548377.33111.4Chr15: 13.1492480.955710.00e+00------
6 -1434988_x_atAldh2aldehyde dehydrogenase 2, mitochondrialChr5: 122.0177609.37211.9Chr3: 10.3271010.954710.00e+000.2370.1440.483
7 -1458048_at6720406K03hypothetical protein 6720406K03Chr11: 31.9955367.82511.3ChrX: 112.6373530.954710.00e+00------
8 -1455707_atC130037N17RikRIKEN cDNA C130037N17 geneChr15: 8.9964908.98811.3Chr1: 193.7319960.953710.00e+00------
9 -1454251_at1700034J04RikRIKEN cDNA 1700034J04 geneChr12: 11.2283148.10811.5ChrX: 112.6373530.951710.00e+00------
10 -1415862_atTyrp1tyrosinase-related protein 1 (brown locus, glaucoma-associated pigment epithelium catalase); last three exons and proximal 3' UTRChr4: 80.4925607.42414.3Chr15: 13.1492480.948710.00e+000.310-0.0270.894
11 -1444984_atC77649expressed sequence C77649Chr13: 32.8335027.00311.1ChrX: 112.6373530.946710.00e+00------
12 -1457982_at1700052M18RikRIKEN cDNA 1700052M18 gene; non-coding, well expressed (hippocampus) antisense sequence in promoter of Gm1564Chr11: 102.5263369.59410.4ChrX: 112.6373530.945710.00e+00------
13 -1421186_atCcr2chemokine (C-C motif) receptor 2Chr9: 124.0226997.93911.6ChrX: 112.6373530.944710.00e+000.327-0.1210.555
14 -1438549_a_atSrrserine racemase; last exonChr11: 74.72174110.78812.0ChrX: 112.6373530.944710.00e+000.331-0.1130.582
15 -1420604_atHesx1homeo box gene expressed in ES cellsChr14: 27.8146167.07313.9Chr15: 13.1492480.943710.00e+000.471-0.1230.549
16 -1444022_at1110054O05RikRIKEN cDNA 1110054O05; putative exon (from ESTs)Chr14: 14.9437319.38012.0Chr15: 13.1492480.943710.00e+000.1560.4440.023
17 -1439404_x_atZfxzinc finger protein X-linked; exons 5 and 6ChrX: 91.3261099.49912.1ChrX: 112.6373530.942710.00e+000.3170.1000.626
18 -1420228_atAsh2lash2 (absent, small, or homeotic) 2-like (histone H3-K4 methylation); antisense in distal 3' UTRChr8: 26.9274318.34913.8Chr3: 10.3271010.942710.00e+000.227-0.2140.294
19 -1424612_at9330161F08RikRIKEN cDNA 9330161F08 geneChr15: 34.5047228.35611.9Chr1: 193.7319960.942710.00e+000.145----
20 -1425815_a_atHmmrhyaluronan mediated motility receptor (RHAMM)Chr11: 40.5149738.4759.8Chr19: 53.4592780.941710.00e+000.374-0.2220.277
21 -1421367_at4930549C01RikRIKEN cDNA 4930549C01 geneChr4: 136.1664397.94411.6Chr15: 13.1492480.941710.00e+00---0.1210.558
22 -1431884_at1110019B22RikRIKEN cDNA 1110019B22 geneChr10: 95.1811607.74212.1Chr15: 13.1492480.939710.00e+00---0.0890.666
23 -1432588_at5830468K08RikRIKEN cDNA 5830468K08 thymus EST; 3' end of non-coding sequence from AK018043 (antisense in last intron of Ythdf3)Chr3: 16.1104157.42013.2Chr15: 13.1492480.938710.00e+00------
24 -1444789_atF730011B02ESTsChr15: 99.0800207.96711.1Chr15: 13.1492480.938710.00e+00------
25 -1441483_atSlitrk2Slitrk2 SLIT and NTRK-like family, member 2ChrX: 63.9086796.88412.1Chr1: 193.7319960.937710.00e+000.226-0.1180.566
26 -1424148_a_atStap2signal transducing adaptor family member 2; exonChr17: 56.1372038.39811.5ChrX: 112.6373530.937710.00e+000.231-0.1220.554
27 -1454463_at3110047M22RikRIKEN cDNA 3110047M22 geneChr6: 50.5177278.09110.9Chr3: 149.0891950.937710.00e+00------
28 -1442589_atTrpc5transient receptor potential cation channel, subfamily C, member 5ChrX: 140.8524326.56111.7Chr3: 10.3271010.936710.00e+000.222-0.1070.604
29 -1430193_at5730505K17RikRIKEN cDNA 5730505K17 geneChr2: 118.9232287.60613.6Chr15: 13.1492480.935710.00e+00------
30 -1435040_atIrak3interleukin-1 receptor-associated kinase 3Chr10: 119.5789866.39812.9Chr1: 193.7319960.934710.00e+000.236-0.1710.403
31 -1456734_atTxn1thioredoxin 1; last exon and proximal 3' UTRChr4: 57.9566337.38814.7Chr3: 10.3271010.933710.00e+000.314-0.1970.336
32 -1420143_atD930043C02Rikmembrane-associated nucleic acid binding proteinChr6: 116.1023018.4989.2Chr15: 13.1492480.932710.00e+000.188----
33 -1446615_atD230007K08RikRIKEN cDNA D230007K08ChrX: 6.1146458.82211.5Chr15: 13.1492480.932710.00e+000.222----
34 -1451774_atOTTMUSG00000002196similar to keratin associated protein 4-10 (predicted gene, OTTMUSG00000002196) protein coding; keratin associated protein 4.10Chr11: 99.7120428.05112.5Chr15: 13.1492480.932710.00e+00---0.1170.568
35 -1443940_atLrrc22leucine rich repeat containing 22 (retina-restricted expression, 10q23.1, candidate retinopathy gene); mid 3' UTRChr14: 37.8863327.90212.4ChrX: 112.6373530.931710.00e+00------
36 -1447191_at5430401O09RikESTs, Weakly similar to RIKEN cDNA 5730493B19 [] [M.musculus]Chr15: 99.9730238.14110.6Chr1: 193.7319960.931710.00e+00------
37 -1457757_atLOC269389embyronic retinal HMG-box protein; exons 6, 7, and 8Chr2: 163.0735317.14012.1Chr3: 149.0891950.930710.00e+000.239----
38 -1441289_atC1orf54human chromosome 1 open reading frame 54Chr3: 95.6971958.08011.6Chr15: 13.1492480.930710.00e+00------
39 -1456561_s_atZfp393zinc finger protein 393Chr4: 117.4292807.21917.2Chr15: 13.1492480.930710.00e+000.256----
40 -1425493_atBmpr1abone morphogenetic protein receptor, type 1A; mid proximal 3' UTRChr14: 35.2268039.98411.0ChrX: 112.6373530.929710.00e+000.4300.1040.613
41 -1452986_atHgdhomogentisate 1, 2-dioxygenaseChr16: 37.6317617.26813.3Chr3: 10.3271010.929710.00e+000.258-0.0790.701
42 -1447563_atRps3ribosomal protein S3; last exon and 3' UTRChr7: 106.6273988.14110.0Chr2: 178.9427860.928710.00e+000.284-0.1130.581
43 -1436749_atMesdc2mesoderm development candiate 2Chr7: 91.0482808.40810.6Chr15: 13.1492480.928710.00e+000.253-0.1460.477
44 -1439245_atTnrc6trinucleotide repeat containing 6a; 3' UTRChr7: 130.3367897.82814.2Chr3: 10.3271010.927710.00e+000.292----
45 -1450353_atSlc25a17peroxisomal integral membrane protein 47; mid 3' UTRChr15: 81.1496437.94617.2Chr15: 13.1492480.927710.00e+000.158-0.0440.831
46 -1446656_atAngpt1angiopoietin 1Chr15: 42.4521598.05812.9ChrX: 127.0092660.927710.00e+000.334-0.0960.642
47 -1458006_atCpmcarboxypeptidase M; intron or possible short form 3' UTR from EST AK087584Chr10: 117.1137238.20916.6Chr15: 13.1492480.927710.00e+000.3230.3600.071
48 -1441754_atBB807463BB807463; antisense in BB807463Chr19: 6.9162227.90211.9ChrX: 112.6373530.927710.00e+00------
49 -1432266_at9430077C05RikRIKEN cDNA 9430077C05 geneChr2: 20.7053487.10314.1ChrX: 112.6373530.926710.00e+000.364----
50 -1451950_a_atCd80CD80 antigenChr16: 38.4739797.83611.3Chr15: 13.1492480.925710.00e+000.251-0.1190.564
51 -1448011_atVps13cvacuolar protein sorting 13C; antisense in 3' UTRChr9: 67.8434778.47210.5ChrX: 112.6373530.925710.00e+000.207-0.2320.254
52 -1419992_x_at1200014K04RikRIKEN cDNA 1200014K04 geneChr19: 10.1123217.28213.0Chr3: 10.3271010.924710.00e+00------
53 -1459589_atCryl1crystallin, lamda 1 (L-gulonate 3-dehydrogenase); last exon and proximal 3' UTRChr14: 57.89387210.40011.3Chr19: 53.4592780.923710.00e+000.2070.0010.996
54 -1443421_s_atPcdhb15protocadherin beta 15Chr18: 37.6355307.83412.1Chr2: 179.2574630.923710.00e+000.174-0.1550.451
55 -1444961_atC230066G23RikRIKEN cDNA C230066G23 geneChr4: 24.2977217.37713.8Chr15: 13.1492480.923710.00e+00---0.2580.203
56 -1422086_atTbx19T-box 19Chr1: 167.0684167.55219.8Chr15: 13.1492480.923710.00e+000.299-0.1920.346
57 -1433260_atPick1protein interacting with C kinase 1 (protein kinase C, alpha binding protein); 5' UTR of Pick1 (from AK015656)Chr15: 79.0589856.87610.7Chr15: 13.1492480.923710.00e+000.266-0.1680.411
58 -1429895_at2310010G23RikRIKEN cDNA 2310010G23 geneChrX: 34.3548777.95111.7Chr15: 13.1492480.922710.00e+00------
59 -1458550_atMyo1dmyosin IDChr11: 80.4870137.42211.8Chr1: 193.7319960.922710.00e+000.2260.0900.661
60 -1453673_atAtp13a4ATPase type 13A4Chr16: 29.5405457.63110.8Chr15: 13.1492480.922710.00e+000.2050.0690.738
61 -1433809_atDdx5DEAD (Asp-Glu-Ala-Asp) box polypeptide 5; last exon and 3' UTRChr11: 106.5980158.07613.9Chr15: 13.1492480.922710.00e+00--0.2280.263
62 -1431063_at5830445O15RikRIKEN cDNA 5830445O15 geneChr2: 120.2864967.79413.4ChrX: 112.6373530.921710.00e+00------
63 -1444682_atOxct3-oxoacid CoA transferase 1 (succinyl-CoA:3-ketoacid-coenzyme A transferase 1, mitochondrial precursor); distal 3' UTRChr15: 3.9750217.36113.2Chr3: 10.3271010.921710.00e+00------
64 -1459654_atGlceglucocorticoid induced transcript 1Chr9: 61.9601297.82313.6Chr2: 179.2574630.921710.00e+000.2560.2330.251
65 -1445285_atC630041L24Riknon-coding RNA RIKEN cDNA C630041L24 exclusively hippocampus CA1 expressed; non-coding sequence antisense in Hrh3 promoterChr2: 179.8405037.76413.9Chr3: 10.3271010.921710.00e+000.193----
66 -1425038_atSlc22a19solute carrier family 22 (organic anion transporter), member 19Chr19: 7.7477046.91714.1Chr15: 13.1492480.920710.00e+000.194----
67 -1458634_atCd47CD47 antigen (Rh-related antigen, integrin-associated signal transducer); last intronChr16: 49.8566757.29913.4Chr2: 180.8255810.920710.00e+000.3220.1890.355
68 -1418065_atRag2recombination activating gene 2Chr2: 101.4708127.43113.3Chr3: 10.3271010.920710.00e+000.293-0.1270.538
69 -1451661_atAkap4A kinase (PRKA) anchor protein 4ChrX: 6.6547357.82013.2ChrX: 112.6373530.920710.00e+000.243-0.1140.580
70 -1431561_a_atDhx34DEAH (Asp-Glu-Ala-His) box polypeptide 34; last three exonsChr7: 16.7826528.50614.7Chr15: 13.1492480.919710.00e+000.2010.0460.823
71 -1435887_atSerpina11serine (or cysteine) proteinase inhibitor, clade A (alpha-1 antiproteinase, antitrypsin), member 11; last three exons and proximal 3' UTRChr12: 105.2184827.61712.0ChrX: 112.6373530.919710.00e+000.117-0.0980.634
72 -1440846_atScaiScai suppressor of cancer cell invasion; exons 7, 8, and 9Chr2: 38.9624588.58610.4Chr1: 193.7319960.919710.00e+000.191----
73 -1444938_at1700081L11RikRIKEN cDNA 1700081L11 geneChr17: 39.8399557.62210.4ChrX: 127.0092660.919710.00e+000.142-0.2960.142
74 -1440937_atD330023I21RikRIKEN cDNA D330023I21 geneChrX: 136.4790017.43111.7Chr3: 10.3271010.919710.00e+00------
75 -1430767_a_at4930543E12RikMus musculus transcribed sequencesChr7: 120.2437236.74810.9Chr15: 13.1492480.919710.00e+00---0.2510.217
76 -1458193_atFabp9fatty acid binding protein 9, testisChr3: 10.1800508.23710.3Chr3: 10.3271010.919710.00e+00---0.0920.656
77 -1439121_atH2-T17histocompatibility 2, T region locus 17Chr17: 36.1757438.95110.4ChrX: 112.6373530.918710.00e+00--0.0800.698
78 -1419991_at1200014K04RikRIKEN cDNA 1200014K04 gene--7.17012.7Chr3: 10.3271010.918710.00e+00------
79 -1419295_atCreb3l1cAMP responsive element binding protein 3-like; 3' UTRChr2: 91.8225458.65811.3Chr2: 179.2574630.918710.00e+000.283-0.0600.771
80 -1444697_at4732490B19RikRIKEN cDNA 4732490B19 geneChr11: 113.0646907.39513.9Chr3: 10.3271010.918710.00e+00------
81 -1459705_atOlfm3olfactomedin 3; intron (from BE980857)Chr3: 114.6479587.24511.6Chr2: 178.9427860.918710.00e+000.2100.0660.749
82 -1448092_x_atSerpina4-ps1clone IMAGE:4194299, mRNAChr12: 105.3248227.62611.8ChrX: 127.0092660.918710.00e+000.311-0.0160.938
83 -1433302_atCdh10cadherin 10Chr15: 18.7488218.19910.9ChrX: 112.6373530.918710.00e+000.2820.2670.187
84 -1431904_at4933427G17RikRIKEN cDNA 4933427G17 geneChr7: 128.1424157.49910.0Chr15: 13.1492480.917710.00e+00---0.1180.566
85 -1441303_at0610031G08RikESTsChr2: 144.3814657.04612.4ChrX: 112.6373530.917710.00e+00---0.0340.869
86 -1427807_at4930448N21RikRIKEN cDNA 4930448N21 geneChr8: 48.5002747.73611.2Chr3: 10.3271010.917710.00e+00--0.0610.767
87 -1444479_at5730437C12RikRIKEN cDNA 5730437C12 geneChr9: 76.3360056.85514.3Chr15: 13.1492480.916710.00e+00------
88 -1458835_atA330033J07RikRIKEN cDNA A330033J07 geneChr12: 32.4702098.13310.8Chr15: 13.1492480.916710.00e+00------
89 -1438825_atCalm3calmodulin 3; poor probe set specificity (3' UTR)Chr7: 17.5020746.87814.5Chr3: 10.3271010.916710.00e+00--0.1290.531
90 -1439055_at2210409E12RikRIKEN cDNA 2210409E12 geneChr11: 88.8260527.28214.5Chr15: 13.1492480.916710.00e+00---0.2480.221
91 -1457171_atC530043A13RikRIKEN cDNA C530043A13 geneChr7: 86.6631528.4809.4Chr3: 10.3271010.915710.00e+00---0.0070.974
92 -1451856_atTnrc15trinucleotide repeat containing 15Chr1: 89.2250468.50110.9ChrX: 127.0092660.915710.00e+000.238-0.0350.864
93 -1454460_atSerpine2serpin peptidase inhibitor, clade E (plasminogen activator inhibitor type 1), member 2 (protease nexin 1)Chr1: 79.8355407.49410.5Chr15: 13.1492480.915710.00e+000.4010.0230.910
94 -1425999_atCfhl1complement component factor h-related like 1; locally repetitive 5' elementChr1: 141.5943627.16211.1ChrX: 112.6373530.915710.00e+000.194----
95 -1430963_atGcnt3glucosaminyl (N-acetyl) transferase 3, mucin typeChr9: 69.8810297.07211.9Chr15: 13.1492480.915710.00e+000.289-0.0500.809
96 -1431347_atD730039F16RikRIKEN cDNA D730039F16 geneChr2: 34.7273836.95313.5ChrX: 112.6373530.914710.00e+00--0.0400.848
97 -1446038_atXtrp3s1extra-toes spottingChr9: 123.5868918.42811.1ChrX: 112.6373530.914710.00e+00---0.1410.493
98 -1436288_at1700049M11RikRIKEN cDNA 1700049M11 geneChr2: 164.0246389.14110.4ChrX: 112.6373530.914710.00e+00------
99 -1457608_atGimap8GTPase, IMAP family member 8Chr6: 48.6055168.97010.1Chr1: 55.1843220.914710.00e+000.241-0.0300.886
100 -1458329_x_at1110051B16RikRIKEN cDNA 1110051B16 gene--6.84212.7ChrX: 112.6373530.914710.00e+000.255-0.1810.376
101 -1458270_atKcnb1potassium voltage gated channel, Shab-related subfamily, member 1; 5' UTRChr2: 167.0157567.24914.4Chr15: 13.1492480.913710.00e+000.3010.2180.285
102 -1438387_x_atTop3btopoisomerase (DNA) III betaChr16: 16.8929698.20913.8Chr3: 10.3271010.913710.00e+000.248-0.1180.567
103 -1452568_atWbscr17Williams-Beuren syndrome chromosome region 17 homolog (human)Chr5: 131.4350046.64412.2ChrX: 112.6373530.913710.00e+000.2040.0260.900
104 -1453264_atMarveld3MARVEL (membrane-associating) domain containing 3Chr8: 112.4769226.88510.4Chr3: 10.3271010.913710.00e+00--0.0680.740
105 -1459301_atMrg1myeloid ecotropic viral integration site-related gene 1Chr2: 115.8611688.78710.6ChrX: 127.0092660.913710.00e+000.4780.3870.051
106 -1422370_atOlfr49olfactory receptor 49Chr14: 54.9017316.73810.6Chr2: 66.8059560.913710.00e+00---0.2640.192
107 -1460105_atB3galnt2UDP-GalNAc:betaGlcNAc beta 1,3-galactosaminyltransferase, polypeptide 2Chr13: 14.0689867.06111.7Chr2: 67.6943500.912710.00e+000.169-0.2440.230
108 -1450636_s_atAkp5alkaline phosphatase 5Chr1: 88.9833617.35211.1Chr2: 179.2574630.912710.00e+000.3910.2490.219
109 -1460067_atCcr2chemokine (C-C motif) receptor 2 (from EST AK046579); putative far 3' UTRChr9: 124.0277089.49611.2Chr2: 179.2574630.912710.00e+000.327-0.1210.555
110 -1442958_atMpgN-methylpurine-DNA glycosylaseChr13: 32.6248747.58215.0Chr15: 13.1492480.911710.00e+000.3040.3970.045
111 -1458881_atBC034664cDNA sequence BC034664Chr13: 36.2025377.94510.0Chr15: 13.1492480.911710.00e+000.178----
112 -1458350_atOpa3optic atrophy 3 (autosomal recessive, with chorea and spastic paraplegia); antisense in far 3' end of Opa3 UTRChr7: 19.8331337.96413.2Chr15: 13.1492480.911710.00e+000.219-0.1930.345
113 -1428965_at1700007N18RikRIKEN cDNA 1700007N18 geneChr16: 50.5902007.27315.5Chr15: 13.1492480.911710.00e+000.221----
114 -1433318_at5430416B10RikRIKEN cDNA 5430416B10 geneChr2: 113.4123126.9149.2Chr19: 53.4592780.910710.00e+00------
115 -1446413_at4930430F21RikRIKEN cDNA 4930430F21 geneChr10: 86.7606047.71614.4Chr15: 13.1492480.910710.00e+00------
116 -1459427_atStamsignal transducing adaptor molecule (SH3 domain and ITAM motif) 1Chr2: 14.0638067.76415.1Chr15: 13.1492480.910710.00e+000.2420.1000.626
117 -1454355_at1810021M19RikRIKEN cDNA 1810021M19 geneChr2: 165.7131417.56410.1Chr15: 13.1492480.910710.00e+00------
118 -1458804_atSlco4c1solute carrier organic anion transporter family, member 4C1Chr1: 98.7423007.60211.1Chr2: 179.2574630.910710.00e+000.139-0.1830.371
119 -1459383_atBC049807cDNA sequence BC049807Chr17: 21.7055878.04212.6ChrX: 112.6373530.910710.00e+00---0.1120.587
120 -1437703_atFbxw14F-box and WD-40 domain protein 14 (F-box protein EG382156); last two exonsChr9: 109.2813869.24410.2Chr2: 179.2574630.910710.00e+00---0.1570.443
121 -1458191_atFoxp2forkhead box P2 (neocortex layer 6 signature gene); last exon and 3' UTR of short formChr6: 15.3613978.24210.8ChrX: 112.6373530.909710.00e+000.4040.3970.045
122 -1441042_atFgf1fibroblast growth factor 1Chr18: 39.0063467.65711.6ChrX: 112.6373530.909710.00e+000.4400.2150.292
123 -1419090_x_atKlk26kallikrein 26Chr7: 51.2713477.37114.5ChrX: 112.6373530.909710.00e+000.255----
124 -1429672_atArhgap15Rho GTPase activating protein 15Chr2: 43.8503617.30213.6Chr3: 10.3271010.909710.00e+000.220-0.2090.306
125 -1440733_at1200009O22RikRIKEN cDNA 1200009O22 geneChr6: 53.8177639.17511.2ChrX: 112.6373530.909710.00e+00--0.4240.031
126 -1450797_a_atCbx1chromobox homolog 1 (Drosophila HP1 beta)Chr11: 96.6695167.51312.7Chr15: 13.1492480.908710.00e+000.243-0.2150.292
127 -1442976_atC81072expressed sequence C81072Chr3: 75.2570167.08212.1Chr3: 10.3271010.908710.00e+00------
128 -1441116_atAU024404AU024404 EST; antisense in first intron of PamChr1: 99.9567387.84413.5Chr19: 53.4592780.908710.00e+00------
129 -1444023_atAnk2ankyrin 2, brainChr3: 126.7278066.86711.0Chr3: 10.3271010.908710.00e+000.2910.0280.892
130 -1444238_atD030060M11RikESTsChr6: 134.5553048.12012.5Chr15: 13.1492480.908710.00e+00------
131 -1459529_atE230016K23RikRIKEN cDNA E230016K23 geneChr11: 83.4369296.97112.8Chr3: 10.3271010.908710.00e+00---0.4150.035
132 -1457950_atLOC193217ESTsChr5: 82.2432426.61211.7Chr3: 10.3271010.908710.00e+00------
133 -1444888_atAU022852expressed sequence AU022852Chr15: 9.7364237.51112.2Chr2: 66.8059560.907710.00e+00------
134 -1432974_at4933421H12RikRIKEN cDNA 4933421H12 geneChr19: 37.5408727.31214.8Chr15: 13.1492480.907710.00e+00------
135 -1421983_s_atHnf4ahepatic nuclear factor 4, alpha; mid 3' UTRChr2: 163.3968637.33915.1Chr15: 13.1492480.907710.00e+000.293-0.0750.715
136 -1449317_atCflarCASP8 and FADD-like apoptosis regulatorChr1: 58.7895837.98812.1ChrX: 112.6373530.907710.00e+000.324-0.0820.692
137 -1432345_at1700048F04RikRIKEN cDNA 1700048F04 geneChr5: 126.4325018.96110.1ChrX: 112.6373530.907710.00e+00---0.1060.605
138 -1419836_atAU040583AU040583 EST; well expressed sequence (putative non-coding)Chr17: 46.2334799.68310.1Chr15: 13.1492480.907710.00e+00------
139 -1437867_atC330014B19RikRIKEN cDNA C330014B19 geneChr13: 3.3953307.57112.1Chr3: 10.3271010.906710.00e+00------
140 -1456578_x_atLasp1LIM and SH3 protein 1; distal 3' UTRChr11: 97.6999858.91310.1Chr3: 10.3271010.906710.00e+000.3360.2130.297
141 -1415851_a_atImpdh2inosine 5'-phosphate dehydrogenase 2Chr9: 108.46767611.56911.4Chr15: 13.1492480.906710.00e+000.282-0.3070.127
142 -1458923_atC230066G23RikESTsChr14: 65.0375067.26411.7Chr14: 118.8616530.906710.00e+00---0.2580.203
143 -1415809_atTpbpatrophoblast specific protein alphaChr13: 61.0399316.59016.8Chr3: 10.3271010.906710.00e+000.335-0.3010.135
144 -1439745_atCacng7calcium channel, voltage-dependent, gamma subunit 7 (type II transmembrane AMPA receptor regulatory protein); mid distal 3' UTRChr6: 91.98449010.34210.3Chr2: 65.7042710.905710.00e+000.1830.1120.586
145 -1446874_at0610009O04RikAU040128 EST; non-codingChr15: 31.7750788.4099.7Chr15: 13.1492480.905710.00e+00------
146 -1445019_atCcr2chemokine (C-C motif) receptor 2Chr1: 175.5375346.9629.2Chr5: 84.2651080.905710.00e+000.327-0.1210.555
147 -1441250_at9330101J02RikESTsChr15: 80.2822657.45813.1ChrX: 112.6373530.905710.00e+00---0.1350.510
148 -1445482_atGle1lGLE1 RNA export mediator-like (yeastChr2: 29.8154148.12711.7ChrX: 112.6373530.905710.00e+000.253-0.0430.834
149 -1421521_at4930430A15RikRIKEN cDNA 4930430A15 geneChr2: 111.0334536.49311.6Chr15: 13.1492480.905710.00e+00---0.1580.440
150 -1454005_atFmo2flavin containing monooxygenase 2; intron 3 or rare short form 3' UTRChr1: 164.8183127.68810.4ChrX: 112.6373530.904710.00e+000.2480.3180.114
151 -1442762_atBC042775RIKEN cDNA 4930595O22 geneChr4: 155.4177417.57513.9Chr15: 13.1492480.904710.00e+000.243----
152 -1419970_atSlc35a5solute carrier family 35, member A5 (probable UDP-sugar transporter protein); mid 3' UTRChr16: 45.1417557.25812.2ChrX: 127.0092660.904710.00e+00--0.3870.051
153 -1446710_atGtf2a2general transcription factor II A, 2Chr9: 69.8726567.03914.6Chr15: 13.1492480.904710.00e+000.177-0.1980.332
154 -1459425_at9430089E08RikESTsChr6: 81.3952108.02711.7Chr2: 178.9427860.904710.00e+00------
155 -1444038_atAU015836expressed sequence AU015836ChrX: 91.2204687.26510.4Chr5: 128.2649740.904710.00e+00---0.4110.037
156 -1440683_atA930004D18RikRIKEN cDNA A930004D18 geneChr2: 17.9569916.74210.7ChrX: 112.6373530.904710.00e+00--0.0110.958
157 -1457394_at2900002K06RikRIKEN cDNA 2900002K06 geneChrX: 7.7236187.15511.6Chr15: 13.1492480.904710.00e+00------
158 -1459598_atHip1huntingtin interacting protein 1; highly expressed sequence in last intron of Hip1 (putative)Chr5: 135.8879289.71711.4Chr15: 13.1492480.903710.00e+000.3150.3310.099
159 -1447680_atCpd1cerebellar postnatal development protein 1--7.05712.7Chr15: 13.1492480.903710.00e+00------
160 -1454726_s_atPtpdc1protein tyrosine phosphatase domain containing 1; mid distal 3' UTRChr13: 48.6735529.90913.8Chr1: 193.7319960.903710.00e+000.1120.1980.331
161 -1447694_x_atNeo1neogenin; distal 3' UTRChr9: 58.7227458.1609.9Chr3: 10.3271010.903710.00e+000.3670.1720.400
162 -1422941_atWnt16wingless-related MMTV integration site 16Chr6: 22.2479847.24711.6ChrX: 112.6373530.903710.00e+000.334-0.3620.070
163 -1447157_atBB751612BB751612Chr14: 70.1452718.72110.8Chr15: 13.1492480.903710.00e+00------
164 -1427292_atIgl-V1immunoglobulin lambda chain, variable 1Chr16: 19.0633487.55710.3Chr19: 53.4592780.903710.00e+000.206-0.0930.651
165 -1425675_s_atCeacam1CEA-related cell adhesion molecule 1; mid proximal 3' UTRChr7: 26.2477747.82610.9Chr2: 179.2574630.903710.00e+000.383-0.1710.403
166 -1418751_atSitsucrase isomaltase, structuralChr4: 43.4952967.50412.1Chr15: 13.1492480.902710.00e+00------
167 -1421532_atLgr8leucine-rich repeat-containing G protein-coupled receptor 8Chr5: 150.8727717.87312.4ChrX: 132.1209310.902710.00e+000.337----
168 -1421078_atTcf23transcription factor 23Chr5: 31.2788518.45610.0ChrX: 112.6373530.902710.00e+000.221-0.0960.641
169 -1447247_atPpp1r9bprotein phosphatase 1, regulatory subunit 9B; far 3' UTRChr11: 94.8690328.68510.3Chr15: 13.1492480.902710.00e+000.2650.0550.790
170 -1440525_atAcvr2bactivin A receptor, type IIB; distal 3' UTRChr9: 119.3472318.17612.1Chr15: 13.1492480.902710.00e+000.3790.2160.290
171 -1444401_atC80913expressed sequence C80913Chr7: 38.7872258.73611.0ChrX: 112.6373530.902710.00e+000.262-0.0670.745
172 -1419481_atSellselectin, lymphocyteChr1: 166.0098168.31913.1Chr3: 10.3271010.902710.00e+000.288-0.1940.343
173 -1458930_at9030022M04ESTsChr9: 99.5222137.86710.0Chr15: 13.1492480.902710.00e+00------
174 -1425996_a_atSmarca3SWI/SNF related, matrix associated, actin dependent regulator of chromatin, subfamily a, member 3; last two exons of short form message (exons 20 and 21)Chr3: 20.0064959.92810.2Chr19: 53.4592780.901710.00e+000.290----
175 -1438302_atZfZhangfei HCF-binding transcription protein (zinc finger, DHHC domain containing 9, retinal bipolar cell expression signature); antisense in 3' UTR and last exonChr7: 97.5922037.30213.0ChrX: 112.6373530.901710.00e+000.239----
176 -1433074_atDlg2discs, large 2 (postsynaptic density protein 93 kD, chapsyn 110); intron 10 or 11Chr7: 99.2305896.92413.3Chr15: 13.1492480.901710.00e+000.2240.1950.340
177 -1449667_atBB255841BB255841Chr5: 105.2541557.23710.6Chr3: 10.3271010.901710.00e+00------
178 -1421782_a_atSmr2submaxillary gland androgen regulated protein 2Chr5: 88.5375727.32512.3ChrX: 112.6373530.901710.00e+000.285-0.1270.536
179 -1460732_a_atPplperiplakin; 3' UTRChr16: 5.0868467.96314.1Chr1: 193.7319960.901710.00e+000.2820.0660.748
180 -1425559_a_atSahSA rat hypertension-associated homologChr7: 126.9243737.51113.7Chr2: 66.8059560.901710.00e+000.243----
181 -1459688_at2610510H03RikESTsChr2: 128.1709307.24310.9ChrX: 112.6373530.900710.00e+00---0.0660.747
182 -1446564_atD18Ertd169eDNA segment, Chr 18, ERATO Doi 169, expressedChr18: 64.0699358.2409.9Chr2: 180.8255810.900710.00e+00------
183 -1458361_atDclre1cDNA cross-link repair 1C (Artemis protein, Athabascan-type severe combined immunodeficiency); distal half of 3' UTRChr2: 3.3766558.3319.9Chr3: 10.3271010.900710.00e+000.264-0.1490.468
184 -1425003_atUroc1urocanase domain containing 1; mid distal 3' UTRChr6: 90.31399411.2668.7Chr15: 13.1492480.900710.00e+000.172-0.0370.857
185 -1446034_atB430305I03RikESTsChr14: 40.2890008.11112.9ChrX: 127.0092660.900710.00e+00------
186 -1443081_atGata6GATA binding protein 6; intron (from EST AK053151)Chr18: 11.0570318.2589.0Chr2: 179.2574630.900710.00e+000.4320.3060.128
187 -1455926_atLsm6RIKEN cDNA 2410088K19 geneChr11: 77.9630608.33712.3Chr15: 12.9720880.899710.00e+00--0.0240.909
188 -1433057_atIgsf4dimmunoglobulin superfamily, member 4Chr16: 66.8928127.03915.8Chr3: 10.3271010.899710.00e+000.210----
189 -1447411_atUgdhUDP-glucose dehydrogenaseChr5: 65.8213867.45314.0Chr3: 10.3271010.899710.00e+000.312-0.1550.450
190 -1445614_at1190002B21RikESTs--7.59310.6Chr15: 13.1492480.899710.00e+00------
191 -1446198_atD130064H19RikRIKEN cDNA D130064H19 geneChr13: 112.4822526.87610.4Chr15: 13.1492480.899710.00e+00------
192 -1436097_x_atArhgap9Rho GTPase activating protein 9Chr10: 126.7659378.21412.3Chr15: 13.1492480.899710.00e+000.165-0.1870.360
193 -1453148_atSema3dsema domain, immunoglobulin domain (Ig), short basic domain, secreted, (semaphorin) 3D; distal end of last exon and proximal 3' UTR (transQTL on chr 4 in BXD Eye Data)Chr5: 12.5851927.30813.6Chr15: 3.2291280.899710.00e+000.214-0.2230.274
194 -1454575_atUtyubiquitously transcribed tetratricopeptide repeat gene, Y chromosomeChrY: 0.3778137.0858.8Chr15: 13.1492480.899710.00e+000.2730.0200.923
195 -1422400_a_atHemt1hematopoietic cell transcript 1Chr15: 74.6544267.27310.8Chr15: 13.1492480.899710.00e+000.146-0.1410.491
196 -1458279_atFoxo3forkhead box O3; intron 2 (from AK157015)Chr10: 41.9279927.62412.8Chr3: 10.3271010.899710.00e+000.325-0.2530.212
197 -1438638_x_atFam116bfamily with sequence similarity 116, member B (protein LOC414918); distal 3' UTRChr15: 89.01266611.4239.6Chr2: 179.2574630.898710.00e+00------
198 -1455457_atCyp2c54cytochrome P450, family 2, subfamily c, polypeptide 54 (xenobiotic metabolism, phenytoin, tolbutamide, ibuprofen, warfarin, similar to human CYP2C9 and CYP2C19, also see Cyp2c37 and Cyp2c50); putative 3' UTRChr19: 40.1126247.59410.4Chr15: 13.1492480.898710.00e+000.162-0.1100.591
199 -1418753_atGfpt2glutamine fructose-6-phosphate transaminase 2Chr11: 49.6515258.63013.2Chr3: 10.3271010.898710.00e+000.2750.1380.500
200 -1453829_at2310007J06RikRIKEN cDNA 2310007J06 geneChr14: 51.5096758.7179.8ChrX: 112.6373530.898710.00e+00------
201 -1419846_atTaf15TAF15 RNA polymerase II, TATA box binding protein (TBP)-associated factorChr11: 83.3141258.19011.4Chr3: 10.3271010.898710.00e+000.238-0.1390.499
202 -1439876_atVti1avesicle transport through interaction with t-SNAREs homolog 1A (yeast)Chr19: 55.5209808.46310.8Chr15: 13.1492480.898710.00e+000.149-0.0590.773
203 -1444616_x_at8430439C15RikRIKEN cDNA 8430439C15 geneChr13: 23.8152737.98713.2Chr3: 10.3271010.897710.00e+00------
204 -1443942_atGabpb2GA repeat binding protein, beta 2Chr3: 94.9980207.37311.2Chr2: 180.8255810.897710.00e+000.2630.1540.453
205 -1431889_x_atPsg21pregnancy-specific glycoprotein 21Chr7: 19.2322658.1209.4ChrX: 112.6373530.897710.00e+000.228-0.2490.221
206 -1458241_atHmga2high mobility group AT-hook 2Chr10: 119.8786947.49010.5Chr15: 13.1492480.897710.00e+000.419-0.2330.252
207 -1437786_atC80008expressed sequence C80008Chr5: 97.8112986.22316.0ChrX: 112.6373530.897710.00e+000.190----
208 -1449260_atRab3dRAB3D, member RAS oncogene familyChr9: 21.7112637.34215.4Chr15: 13.1492480.897710.00e+000.2780.0510.805
209 -1436068_atZbtb10zinc finger and BTB domain containing 10Chr3: 9.2816047.88013.1Chr15: 13.1492480.897710.00e+000.205-0.2800.165
210 -1430724_atAbhd9RIKEN cDNA 2310063B19 geneChr17: 32.3216558.13513.9ChrX: 112.6373530.896710.00e+000.192-0.1610.433
211 -1433097_at4930473M17RikRIKEN cDNA 4930473M17 gene--6.61210.7Chr15: 13.1492480.896710.00e+00------
212 -1459184_atBG071075BG071075Chr12: 49.9565906.67712.5Chr3: 10.3271010.896710.00e+00------
213 -1443193_atTtll1tubulin tyrosine ligase-like 1Chr15: 83.3224087.71314.0ChrX: 112.6373530.895710.00e+000.1620.0270.897
214 -1427800_atKrtap16-2keratin associated protein 16-2Chr16: 88.8693317.70213.5Chr15: 13.1492480.895710.00e+00------
215 -1431969_at4930402D18RikRIKEN cDNA 4930402D18 geneChr15: 60.7595166.56310.5Chr1: 55.1843220.895710.00e+00------
216 -1451880_atBC006743BC006743 protein; exon and 3' UTRChr14: 75.1524487.29313.1Chr3: 10.3271010.895710.00e+00------
217 -1435979_a_atE330039G21RikRIKEN cDNA E330039G21 geneChr11: 115.7479628.01212.2Chr15: 13.1492480.895710.00e+000.215----
218 -1454574_atFndc3bfibronectin type III domain containing 3BChr3: 27.3140777.32711.2Chr15: 13.1492480.895710.00e+000.2780.3380.092
219 -1437431_atCux1cut-like homeobox 1 (CCAAT displacement protein)Chr5: 136.9589937.32715.0Chr1: 193.7319960.894710.00e+000.436-0.1520.459
220 -1458587_at2310047D07RikRIKEN cDNA 2310047D07 geneChr5: 150.0488508.18812.9Chr15: 13.1492480.894710.00e+00---0.2830.161
221 -1457465_atTnks2tankyrase, TRF1-interacting ankyrin-related ADP-ribose polymerase 2ChrX: 6.2139668.51912.5ChrX: 127.0092660.894710.00e+000.2280.0540.795
222 -1444284_atA430106G13RikRIKEN cDNA A430106G13 geneChr1: 139.8273496.96313.1Chr3: 10.3271010.894710.00e+00--0.0140.946
223 -1427516_a_atBocbiregional cell adhesion molecule-related/down-regulated by oncogenes (Cdon) binding protein; last exonChr16: 44.4857279.5219.2Chr2: 179.2574630.894710.00e+000.499-0.2000.327
224 -1457043_atB3galtlbeta 1,3-galactosyltransferase-like; far 3' UTR (from AK083491)Chr5: 150.5646719.82214.2Chr1: 193.7319960.894710.00e+000.2630.1290.529
225 -1427497_at2610015P09RikRIKEN cDNA 2610015P09 geneChr16: 43.9639397.37616.3Chr2: 179.2574630.893710.00e+00------
226 -1420041_atSnrpbMus musculus transcribed sequence with strong similarity to protein pir:T08738 (H.sapiens) T08738 hypothetical protein DKFZp586E0518.1 - human (fragment)Chr12: 56.0092037.07912.9Chr1: 193.7319960.893710.00e+00---0.2070.309
227 -1425295_atEar11eosinophil-associated, ribonuclease A family, member 11Chr14: 51.8749717.34413.6ChrX: 112.6373530.893710.00e+000.1840.0070.974
228 -1432122_at4933403H06RikRIKEN cDNA 4933403G17 geneChr3: 154.8003628.02317.5ChrX: 112.6373530.893710.00e+00------
229 -1432306_atRapgef5Rap guanine nucleotide exchange factor (GEF) 5Chr12: 118.8416567.29410.7Chr15: 13.1492480.893710.00e+000.2090.2630.193
230 -1431919_atRttnrotatinChr18: 89.2836907.45911.5Chr15: 13.1492480.893710.00e+000.313-0.1390.497
231 -1425438_atSlc26a8clone MGC:36892 IMAGE:4935116, Ntrk2 intron--7.59412.8ChrX: 132.1209310.893710.00e+00---0.0810.696
232 -1446970_atGemin5gem (nuclear organelle) associated protein 5Chr11: 57.9362637.76610.8ChrX: 112.6373530.893710.00e+000.199-0.2400.237
233 -1437523_s_atSgcgsarcoglycan, gamma (35kDa dystrophin-associated glycoprotein, limb-girdle muscular dystrophy, type 2C); mid 3' UTRChr14: 61.8399166.5048.9Chr14: 120.0214650.893710.00e+000.243-0.2740.175
234 -1427787_atSp6trans-acting transcription factor 6Chr11: 96.8843417.18813.9Chr2: 179.2574630.893710.00e+000.358-0.0480.816
235 -1457967_atA030003K21RikRIKEN cDNA A030003K21; 3' end of ESTChr1: 82.9391938.08710.0Chr1: 193.7319960.892710.00e+00---0.1860.363
236 -1418609_atIl1f6interleukin 1 family, member 6Chr2: 24.0715776.90312.7ChrX: 112.6373530.892710.00e+000.239-0.2300.257
237 -1458857_atAV125803ESTsChr15: 36.8777478.28510.9Chr2: 67.6943500.892710.00e+00------
238 -1458681_atTm9sf3transmembrane protein 9 superfamily member 3; intron 1 (AW823484)Chr19: 41.3375809.12412.4Chr14: 118.8616530.892710.00e+000.204-0.3050.130
239 -1443354_atTrim59tripartite motif-containing 59Chr3: 68.8427208.08411.9Chr15: 13.1492480.892710.00e+000.202-0.3650.066
240 -1428925_atSenp1SUMO1/sentrin specific protease 1; putative far 3' UTRChr15: 97.8695527.64211.1Chr1: 193.7319960.892710.00e+000.243-0.0890.664
241 -1417074_atCeacam10CEA-related cell adhesion molecule 10Chr7: 25.5659217.06311.4ChrX: 112.6373530.891710.00e+000.2710.1180.564
242 -1444940_atC76411expressed sequence C76411Chr13: 6.2176117.80910.8Chr15: 13.1492480.891710.00e+00------
243 -1438865_atH13histocompatibility 13Chr2: 152.5338227.74912.8Chr15: 13.1492480.891710.00e+000.2850.1870.361
244 -1430627_atPak2p21 (CDKN1A)-activated kinase 2Chr16: 32.0332257.46113.8Chr15: 13.1492480.891710.00e+000.2880.1200.558
245 -1430540_at5330414O08RikRIKEN cDNA 5330414O08 geneChr7: 107.6885837.64010.4Chr3: 10.3271010.891710.00e+000.183----
246 -1443798_atPik3cdphosphatidylinositol 3-kinase catalytic delta polypeptide; distal 3' UTRChr4: 149.02328011.2499.8Chr2: 179.2574630.891710.00e+000.322-0.1980.333
247 -1432784_at4930556A17RikRIKEN cDNA 4930556A17 geneChr3: 37.7375836.99714.2Chr15: 13.1492480.891710.00e+00---0.2590.201
248 -1455427_atAngpt4angiopoietin 4Chr2: 151.7704997.79611.2Chr15: 13.1492480.891710.00e+000.2530.2670.187
249 -1432687_at4833406M21RikRIKEN cDNA 4833406M21 geneChr13: 110.4403516.45612.3Chr2: 65.7042710.891710.00e+00------
250 -1459943_atA030012G06RikRIKEN cDNA A030012G06 geneChr9: 97.1909116.52311.4Chr15: 13.1492480.891710.00e+00------
251 -1429636_at1700010D01RikRIKEN cDNA 1700010D01 geneChrX: 89.7354346.53615.9Chr15: 13.1492480.891710.00e+00---0.1550.449
252 -1459824_atSmarcc1SWI/SNF related, matrix associated, actin dependent regulator of chromatin, subfamily c, member 1Chr9: 110.1397607.34416.8Chr15: 13.1492480.891710.00e+000.2860.0860.678
253 -1436565_atCeacam10CEA-related cell adhesion molecule 10Chr7: 25.5691066.72612.1Chr15: 13.1492480.891710.00e+000.2710.1180.564
254 -1442459_atAdamts19a disintegrin-like and metalloprotease (reprolysin type) with thrombospondin type 1 motif, 19Chr18: 59.1509146.94314.1ChrX: 112.6373530.890710.00e+000.197-0.0910.660
255 -1450828_atSynpo2synaptopodin 2; exon 1 and intron 1 (transQTL on Chr 4 in BXD eye data)Chr3: 122.8151966.89211.8Chr15: 13.1492480.890710.00e+000.2690.1380.501
256 -1429126_atNudt5nudix (nucleoside diphosphate linked moiety X)-type motif 5Chr2: 5.7913858.92710.0Chr15: 13.1492480.890710.00e+000.1880.1340.514
257 -1444590_at2010305K11RikRIKEN cDNA 2010305K11 geneChr8: 26.8626606.26914.9Chr3: 10.3271010.890710.00e+000.216----
258 -1425229_a_atTcf7l2transcription factor 7-like 2, T-cell specific, HMG-box; 3' UTRChr19: 56.0062579.30911.6ChrX: 127.0092660.890710.00e+000.3130.1070.602
259 -1444229_atNr2f2nuclear receptor subfamily 2, group F, member 2; intron 2 (from EST AK135306)Chr7: 77.5006237.24011.6ChrX: 112.6373530.890710.00e+000.4470.4400.025
260 -1428518_atMlf1ipmyeloid leukemia factor 1 interacting proteinChr8: 47.6636077.48415.9Chr19: 53.4592780.890710.00e+000.245----
261 -1420701_atKlk1kallikrein 1Chr7: 51.2256246.91217.3Chr15: 13.1492480.890710.00e+000.262-0.2760.173
262 -1418705_atCrxcone-rod homeobox containing gene (rod-cone dystrophy 2); distal 3' UTRChr7: 16.4513156.78410.3Chr19: 53.4592780.890710.00e+000.3000.0400.848
263 -1444916_atGm1305gene model 1305; antisense in intronChr1: 181.6662086.73611.0Chr15: 13.1492480.890710.00e+00---0.1010.625
264 -1452564_atOfaoncofetal antigen--6.37014.8ChrX: 112.6373530.890710.00e+00------
265 -1445509_atAtf7activating transcription factor 7Chr15: 102.3785417.54114.9Chr1: 193.7319960.890710.00e+000.2530.0840.682
266 -1432338_at4833419O12RikRIKEN cDNA 4833419O12 geneChr16: 20.8005106.68911.3Chr2: 178.9427860.890710.00e+00------
267 -1418402_atAdam19a disintegrin and metalloproteinase domain 19 (meltrin beta, dendritic cell marker); distal 3' UTRChr11: 45.9602708.93911.3Chr1: 193.7319960.890710.00e+000.3050.0520.800
268 -1456432_atA630035D09RikRIKEN cDNA A630035D09 geneChr15: 80.4043746.80012.8Chr15: 13.1492480.889710.00e+00------
269 -1421130_atZfp111zinc finger protein 111Chr7: 24.9819697.38913.5Chr15: 13.1492480.889710.00e+000.2190.1120.586
270 -1453574_atHba-a1hemoglobin alpha, adult chain 1--7.29714.0ChrX: 112.6373530.889710.00e+000.246-0.0210.918
271 -1431546_at4930509K18RikRIKEN cDNA 4930509K18 geneChr4: 40.2641706.90013.6Chr15: 13.1492480.889710.00e+00------
272 -1452311_atDmgdhdimethylglycine dehydrogenase precursorChr13: 94.5222107.31413.0ChrX: 112.6373530.889710.00e+000.248-0.0850.679
273 -1447672_x_atAW539964hypothetical protein E130013P03Chr8: 108.4555419.2437.9ChrX: 112.6373530.889710.00e+00------
274 -1444809_atRnf181ESTs, Weakly similar to similar to RIKEN cDNA 1810006A16 gene [Homo sapiens] [H.sapiens]Chr8: 112.4839887.03310.2Chr1: 193.7319960.889710.00e+000.1510.4180.034
275 -1430768_atHoxa13homeo box A13Chr6: 52.2065226.28817.7Chr3: 10.3271010.889710.00e+000.4820.4020.042
276 -1450567_a_atCol2a1procollagen, type II, alpha 1Chr15: 97.8064718.51111.7Chr15: 13.1492480.888710.00e+000.393-0.1560.446
277 -1430461_at1700120E14RikRIKEN cDNA 1700120E14 geneChr18: 74.6555956.65215.8Chr15: 13.1492480.888710.00e+00------
278 -1443474_atD7Ertd495eDNA segment, Chr 7, ERATO Doi 495, expressedChr7: 132.6406747.2318.7ChrX: 112.6373530.888710.00e+00------
279 -1449242_s_atHrghistidine-rich glycoprotein; last exonChr16: 22.9612837.73411.0ChrX: 112.6373530.888710.00e+000.277-0.0960.640
280 -1417413_atCuzd1CUB and zona pellucida-like domains 1Chr7: 138.4520977.48811.3Chr3: 149.0891950.888710.00e+000.3060.1030.615
281 -1452493_s_atHoxb8homeo box B8Chr11: 96.1461107.4599.7Chr15: 13.1492480.888710.00e+000.4200.2490.221
282 -1432048_at6330565B04RikRIKEN cDNA 6330565B04 geneChr1: 28.6269317.67316.6Chr3: 10.3271010.888710.00e+00---0.0140.947
283 -1433091_at6430514K02RikRIKEN cDNA 6430514K02 geneChr8: 50.8242637.25614.6Chr1: 193.7319960.888710.00e+00------
284 -1457554_atApobapolipoprotein BChr12: 8.0116887.43011.1Chr2: 178.9427860.888710.00e+000.225-0.0110.958
285 -1433366_atMapk8mitogen activated protein kinase 8Chr14: 34.2344817.77612.9ChrX: 112.6373530.888710.00e+000.2950.0620.762
286 -1420231_atZfp59zinc finger protein 59--7.0039.6Chr3: 10.3271010.888710.00e+00---0.2500.218
287 -1454279_atOaz3ornithine decarboxylase antizyme 3Chr3: 94.2393267.34010.9ChrX: 112.6373530.887710.00e+000.234-0.0360.860
288 -1417828_atAqp8aquaporin 8Chr7: 130.6110067.65513.3Chr15: 13.1492480.887710.00e+000.272-0.1780.383
289 -1428020_at2310066N05RikRIKEN cDNA 2310066I18 geneChr3: 146.1696207.21216.2Chr3: 12.5656160.887710.00e+000.150----
290 -1421485_atTtbk2tau tubulin kinase 2; three exonsChr2: 120.5816197.96611.8Chr15: 13.1492480.887710.00e+000.166-0.1360.509
291 -1428378_atZc3hav1zinc finger CCCH type, antiviral 1Chr6: 38.2657537.89111.9Chr15: 13.1492480.887710.00e+000.236-0.0510.804
292 -1432204_at4930570B17RikRIKEN cDNA 4930570B17 geneChr15: 30.5280007.05412.4Chr9: 79.9914910.887710.00e+00------
293 -1421795_s_atKlrc3killer cell lectin-like receptor subfamily C, member 3Chr6: 129.5932127.33713.4Chr15: 13.1492480.887710.00e+000.140-0.1690.409
294 -1421513_atTdrd1tudor domain containing 1Chr19: 56.9388097.5678.9ChrX: 112.6373530.887710.00e+000.216-0.1440.483
295 -1428773_s_atBcorBcl6 interacting corepressorChrX: 11.6144848.57612.6ChrX: 127.0092660.886710.00e+000.310-0.0320.877
296 -1457126_atMyl4myosin, light polypeptide 4; alternate 3' UTRChr11: 104.4472227.94511.4Chr3: 10.3271010.886710.00e+000.3100.0250.905
297 -1440865_atIfitm6interferon induced transmembrane protein 6Chr7: 148.2016457.04615.9Chr15: 13.1492480.886710.00e+000.242-0.1630.426
298 -1441449_atKdm5clysine (K)-specific demethylase 5C (X-linked mental retardation, jumonji, AT rich interactive domain 1C); intron 9ChrX: 148.6809467.83111.0Chr15: 13.1492480.886710.00e+000.291----
299 -1451898_a_atSema6csema domain, transmembrane domain (TM), and cytoplasmic domain, (semaphorin) 6C; 3' UTRChr3: 94.97729110.20712.3Chr15: 13.1492480.886710.00e+000.242-0.0060.978
300 -1454215_atXrcc4X-ray repair complementing defective repair in Chinese hamster cells 4--6.61310.7Chr15: 13.1492480.886710.00e+000.258-0.1490.469
301 -1421775_atFcer1aFc receptor, IgE, high affinity I, alpha polypeptideChr1: 175.1515097.09916.3ChrX: 112.6373530.886710.00e+000.287-0.1450.481
302 -1446232_atLtbp1latent transforming growth factor beta binding protein 1Chr17: 75.7484657.67212.5ChrX: 112.6373530.886710.00e+000.3140.1610.432
303 -1421206_atLifleukemia inhibitory factorChr11: 4.1739427.05613.1Chr15: 13.1492480.886710.00e+000.374-0.1500.465
304 -1428480_atCdca8cell division cycle associated 8Chr4: 124.5961739.19412.4ChrX: 112.6373530.886710.00e+000.219-0.3840.053
305 -1423541_at4930511I11RikRIKEN cDNA 4930511I11 geneChr17: 28.6745236.8169.6Chr8: 74.9353530.886710.00e+00---0.1360.507
306 -1429811_atC12orf11sarcoma antigen NY-SAR-95, human chromosome 12 open reading frame 11; last 2 exons and last 2 intronsChr6: 146.4985027.86610.8Chr1: 193.7319960.886710.00e+000.237----
307 -1419923_atTrpm3transient receptor potential cation channel, subfamily M, member 3Chr19: 22.7997387.93214.1Chr15: 13.1492480.886710.00e+000.2090.1960.338
308 -1447778_x_atBrcc3BRCA1/BRCA2-containing complex, subunit 3; mid 3' UTRChrX: 72.6979989.61210.4ChrX: 112.6373530.886710.00e+000.234-0.0800.699
309 -1441357_atKirrel3kin of IRRE like 3 (Drosophila)Chr9: 34.3176147.76510.5Chr3: 12.5656160.886710.00e+000.250-0.0760.710
310 -1431740_atSlc7a13solute carrier family 7, (cationic amino acid transporter, y+ system) member 13 (kidney male-specific transporter); intron 1 (from AK002431, male kidney EST)Chr4: 19.7469346.9068.5ChrX: 112.6373530.885710.00e+000.156-0.1360.509
311 -1427825_atSlco1b2solute carrier organic anion transporter family, member 1b2; exon and intronChr6: 141.5966917.52413.7Chr15: 13.1492480.885710.00e+000.2420.0480.818
312 -1456254_atInpp4binositol polyphosphate-4-phosphatase, type 2; intron 5 or antisense in AK140223 ESTChr8: 84.3980988.09511.2ChrX: 112.6373530.885710.00e+000.232-0.1980.332
313 -1445470_atCol6a4procollagen type 6, alpha 4 (putative); 5' UTR and first exonChr9: 105.9735358.65412.3Chr15: 13.1492480.885710.00e+000.188----
314 -1439992_atMov10l1Moloney leukemia virus 10-like 1Chr9: 114.4673747.35519.0Chr15: 13.1492480.885710.00e+000.262-0.2300.257
315 -1430895_at2010109A12RikRIKEN cDNA 2010109A12 geneChr5: 93.6357146.57211.3ChrX: 112.6373530.885710.00e+00---0.3450.084
316 -1446422_atMarch5membrane-associated ring finger (C3HC4) 5, mitochondrial E3 ubiquitin-protein ligaseChr18: 56.9903586.95813.7Chr15: 13.1492480.885710.00e+000.174-0.0220.913
317 -1431476_at4933407I05RikRIKEN cDNA 4933407I05 geneChr9: 51.7235437.1829.6Chr15: 13.1492480.885710.00e+00------
318 -1442086_atMta3metastasis associated 3; intron 4Chr17: 84.1510688.25011.7Chr3: 10.3271010.885710.00e+000.2840.0350.864
319 -1456221_atMGC40768ESTs, Moderately similar to protein phosphatase 4, regulatory subunit 1 [Rattus norvegicus] [R.norvegicus]Chr8: 101.6716317.13911.3Chr2: 180.8255810.885710.00e+00------
320 -1459079_at5430405C01RikESTsChr5: 142.6934736.46611.6ChrX: 112.6373530.885710.00e+00------
321 -1444592_atOsgepO-sialoglycoprotein endopeptidaseChr14: 51.5342618.14210.3Chr15: 3.2291280.885710.00e+000.2620.0450.826
322 -1451750_atIrak4interleukin-1 receptor-associated kinase 4 (NF-kappaB activation); distal 3' UTRChr15: 94.3981787.40311.2Chr15: 13.1492480.885710.00e+000.2540.0150.942
323 -1442554_s_atKalrnkalirin, RhoGEF kinase (huntingtin-associated protein interacting protein duo); two central exonsChr16: 34.00995111.95711.4Chr15: 13.1492480.885710.00e+000.2840.0330.874
324 -1435958_atPigbphosphatidylinositol glycan, class BChr9: 72.8624778.14811.2Chr9: 79.9914910.884710.00e+000.2040.3000.137
325 -1425916_atCapn8calpain 8Chr1: 184.5358628.37713.3ChrX: 112.6373530.884710.00e+000.232-0.1370.504
326 -1445752_at9430089E08RikESTs, Highly similar to L1 repeat, Tf subfamily, member 18 [] [M.musculus]Chr9: 11.8952406.98415.1ChrX: 112.6373530.884710.00e+00------
327 -1446669_atEbf1early B-cell factor 1Chr11: 44.4775527.02710.4Chr9: 79.9914910.884710.00e+000.365-0.0670.746
328 -1459410_atAV304616AV304616 EST; well expressed (hippocampus) non-coding sequenceChr9: 23.6228269.62111.0Chr15: 13.1492480.884710.00e+00------
329 -1446116_atB230378P21RikRIKEN cDNA B230378P21 geneChr6: 32.4649956.73210.6Chr15: 13.1492480.884710.00e+00------
330 -1422313_a_atIgfbp5insulin-like growth factor binding protein 5 (hippocampal DG expression signature); mid 3' UTRChr1: 72.9072989.2869.5Chr2: 179.2574630.884710.00e+000.381-0.0050.980
331 -1446037_atC230081A13RikRIKEN cDNA C230081A13 geneChr9: 56.1304568.88811.7Chr2: 67.6943500.884710.00e+000.1650.1210.555
332 -1446408_atHoxa11homeo box A11Chr6: 52.1891846.43112.3ChrX: 132.1209310.883710.00e+000.4310.0650.751
333 -1446923_atC630010D02RikESTsChr16: 35.1097128.90712.7ChrX: 112.6373530.883710.00e+00------
334 -1459178_atD8Ertd503eDNA segment, Chr 8, ERATO Doi 503, expressedChr8: 120.4255737.06411.7Chr3: 10.3271010.883710.00e+00------
335 -1441786_at1700020L24RikESTsChr18: 5.5657808.0509.5ChrX: 112.6373530.883710.00e+00---0.2010.326
336 -1420470_atSult1c1sulfotransferase family, cytosolic, 1C, member 1Chr17: 54.1017366.97119.3Chr1: 172.9808200.883710.00e+000.255-0.2890.152
337 -1447531_x_atCugbp2CUG triplet repeat, RNA binding protein 2; putative intron 1Chr2: 6.7571679.23613.5Chr1: 193.7319960.883710.00e+000.2980.0180.929
338 -1443416_atC79741expressed sequence C79741; exon 3Chr13: 51.0425087.8119.7Chr3: 10.3271010.883710.00e+00------
339 -1448841_atPfplpore forming protein-likeChr19: 12.5062627.38214.3Chr3: 10.3271010.883710.00e+00---0.2150.292
340 -1423348_atFzd8frizzled homolog 8 (Drosophila)Chr18: 9.2150728.70713.8Chr15: 13.1492480.882710.00e+000.3230.3670.065
341 -1431868_atAkap4A kinase (PRKA) anchor protein 4Chr15: 39.9990767.64112.1Chr2: 66.8059560.882710.00e+000.243-0.1140.580
342 -1421788_x_atKlk26kallikrein 13; last three exonsChr7: 51.2717487.76212.0Chr15: 13.1492480.882710.00e+000.255----
343 -1434830_atMadMax dimerization proteinChr6: 86.5970839.16912.9Chr15: 13.1492480.882710.00e+000.303-0.2500.218
344 -1458094_at6130401L20Rik15 days embryo male testis cDNA, RIKEN full-length enriched library, clone:8030405F19 product:Y box protein 1, pseudogene 2, full insert sequence.Chr18: 84.6198376.99713.0Chr3: 10.3271010.882710.00e+00--0.0850.681
345 -1420183_atLorloricrin (keratinocyte); 3' UTRChr3: 91.88422411.83310.9ChrX: 112.6373530.882710.00e+000.386-0.1420.490
346 -1432758_at2900011L18RikRIKEN cDNA 2900011L18 geneChr13: 108.6066798.75612.4Chr2: 65.7042710.882710.00e+00--0.0700.736
347 -1453653_at6330576A10RikRIKEN cDNA 6330576A10 geneChr14: 100.8781546.75113.0Chr15: 13.1492480.882710.00e+00------
348 -1433124_at4930534I15RikRIKEN cDNA 4930534I15 geneChr2: 22.8823578.02112.2Chr2: 65.7042710.882710.00e+00------
349 -1457653_atA630042L21RikRIKEN cDNA A630042L21 geneChr18: 61.3345238.4409.6ChrX: 112.6373530.882710.00e+000.1760.1570.445
350 -1437836_x_at0610011L14RikRIKEN cDNA 0610011L14 geneChr2: 156.3937638.40410.9Chr1: 193.7319960.882710.00e+000.170-0.2770.171
351 -1430157_at1700095J03RikRIKEN cDNA 1700095J03 geneChr7: 116.5833577.28311.4Chr15: 13.1492480.881710.00e+00---0.1600.434
352 -1458027_atMrpl17mitochondrial ribosomal protein L17; distal 3' UTRChr7: 112.9518098.06113.5ChrX: 112.6373530.881710.00e+000.1710.1360.509
353 -1449621_s_atThsd1thrombospondin, type I, domain 1Chr8: 23.3699527.97517.5ChrX: 112.6373530.881710.00e+000.3010.1430.485
354 -1427584_atAmotangiomotinChrX: 141.8835427.80913.8Chr1: 193.7319960.881710.00e+000.346-0.2910.149
355 -1445985_atTraf3ip2Traf3 interacting protein 2Chr10: 39.3369726.79910.1Chr14: 118.8616530.881710.00e+000.2250.1170.569
356 -1443871_atA430018A01RikESTs, Weakly similar to S57243 collagen alpha 1(I) chain precursor - mouse [M.musculus]Chr3: 100.4910567.12513.7Chr15: 13.1492480.881710.00e+00------
357 -1425279_atPdik1lPDLIM1 interacting kinase 1 likeChr4: 133.8326188.16713.1Chr15: 12.9720880.881710.00e+000.196-0.2860.157
358 -1459555_atRy1ESTsChr14: 64.4708827.69714.1Chr2: 67.6943500.880710.00e+00------
359 -1428576_atHif1anhypoxia-inducible factor 1, alpha subunit inhibitor; distal 3' UTRChr19: 44.6501839.55012.6ChrX: 127.0092660.880710.00e+00---0.2550.208
360 -1443709_atBE979452ESTsChr11: 86.7696647.21014.7Chr15: 13.1492480.880710.00e+00------
361 -1433344_at4930448K20RikRIKEN cDNA 4930448K20; exon 1Chr4: 9.8439968.14510.9Chr3: 10.3271010.880710.00e+00--0.1490.468
362 -1449586_atPkp1plakophilin 1 (ectodermal dysplasia/skin fragility syndrome, corneal epithelium); distal 3' UTRChr1: 137.7680148.04112.3Chr15: 13.1492480.880710.00e+000.259-0.2880.154
363 -1446133_atD2Ertd295eDNA segment, Chr 2, ERATO Doi 295, expressedChr2: 52.7778067.1828.9Chr14: 34.6092560.880710.00e+00------
364 -1445645_atH3104E01non-coding SINE sequence associated with H3104E01; putative 5' end of H3104E01 (antisense in intron of Adam22)Chr5: 8.2773848.02412.1ChrX: 112.6373530.879710.00e+00------
365 -1435244_atVav2Vav2 oncogene; distal 3' UTRChr2: 27.1179818.3169.3Chr1: 193.7319960.879710.00e+000.2840.2390.239
366 -1450538_s_atCma2chymase 2, mast cellChr14: 56.5916028.83712.1Chr14: 118.8616530.879710.00e+00---0.1110.589
367 -1431846_at4930546C10RikRIKEN cDNA 4930546C10 geneChr18: 69.0500547.15511.4Chr15: 13.1492480.879710.00e+00---0.2540.210
368 -1440711_atC630001G18RikRIKEN cDNA C630001G18 geneChr3: 84.1103497.22211.7ChrX: 112.6373530.879710.00e+00------
369 -1432210_at4933401H06RikRIKEN cDNA 4933401H06 geneChr3: 135.4969418.16914.5Chr19: 53.4592780.879710.00e+00------
370 -1417620_atRac2RAS-related C3 botulinum substrate 2; proximal 3' UTRChr15: 78.3917248.76211.8Chr2: 179.2574630.879710.00e+000.291-0.1870.359
371 -1433416_atRfx2regulatory factor X, 2 (influences HLA class II expression)Chr17: 56.9691536.52014.1Chr15: 13.1492480.879710.00e+000.265-0.0720.728
372 -1442532_atMov10l1Moloney leukemia virus 10-like 1Chr19: 28.0868616.72313.2Chr3: 10.3271010.879710.00e+000.262-0.2300.257
373 -1456964_atCpne1copine IChr2: 155.9193047.86510.2ChrX: 112.6373530.878710.00e+000.276-0.1470.474
374 -1426639_a_atTcf7l2transcription factor 7-like 2, T-cell specific, HMG-boxChr19: 55.8922657.75812.1Chr15: 13.1492480.878710.00e+000.3130.1070.602
375 -1444636_atNt5c1b5'-nucleotidase, cytosolic IBChr18: 83.0878326.92712.7Chr15: 13.1492480.878710.00e+000.165-0.1010.623
376 -1420219_atDnajc21DnaJ (Hsp40) homolog subfamily C member 21; last exon and 3' UTRChr15: 10.3910348.77213.3Chr15: 13.1492480.878710.00e+000.136-0.3000.137
377 -1418745_atOmdosteomodulinChr13: 49.6857437.64410.2Chr15: 13.1492480.878710.00e+000.303-0.0400.848
378 -1422904_atFmo2flavin containing monooxygenase 2; last exonChr1: 164.8069868.18216.1Chr15: 13.1492480.878710.00e+000.2480.3180.114
379 -1442588_at9530060I07adult male urinary bladder cDNA, RIKEN full-length enriched library, clone:9530060I07 product:unclassifiable, full insert sequence.Chr1: 98.7772617.48510.3Chr15: 13.1492480.878710.00e+00------
380 -1432550_at4930572D21RikRIKEN cDNA 4930572D21 geneChr7: 4.7354106.71013.7Chr19: 53.4592780.878710.00e+00------
381 -1439199_atPpp2caprotein phosphatase 2a, catalytic subunit, alpha isoformChr11: 51.9108268.55511.7Chr15: 13.1492480.878710.00e+000.292-0.0160.939
382 -1454165_at1700025K04RikRIKEN cDNA 1700025K04 geneChr10: 125.8115286.81511.1Chr15: 13.1492480.877710.00e+00------
383 -1430189_atNol4nucleolar protein 4Chr18: 23.1993248.00913.3Chr15: 13.1492480.877710.00e+000.1760.0510.803
384 -1456780_atBtrcbeta-transducin repeat containing proteinChr19: 45.5984607.22812.4Chr3: 10.3271010.877710.00e+000.3370.1070.603
385 -1453763_atTxndc11thioredoxin domain containing 11Chr16: 11.0875756.80815.1ChrX: 112.6373530.877710.00e+000.1270.0110.959
386 -1445988_atIl17dinterleukin 17D; intron 1 (from BG064506)Chr14: 58.1454448.25315.3Chr15: 13.1492480.877710.00e+000.1790.1940.343
387 -1427397_at2810046L04RikRIKEN cDNA 2810046L04 geneChr3: 53.2832457.84010.4Chr1: 193.7319960.877710.00e+000.1000.0600.771
388 -1424906_atE030024M05RikRIKEN cDNA E030024M05 geneChr12: 16.9957787.31610.0Chr3: 10.3271010.877710.00e+00------
389 -1447213_atSp100nuclear antigen Sp100Chr1: 87.5849378.02611.4Chr2: 180.8255810.877710.00e+000.255-0.1310.523
390 -1449529_s_atPrlpeprolactin-like protein EChr13: 27.7275596.50015.5Chr15: 13.1492480.876710.00e+000.235----
391 -1432400_atEpha1Eph receptor A1Chr6: 42.3170866.60611.6Chr15: 13.1492480.876710.00e+000.3480.0380.854
392 -1446843_atB930007P11RikRIKEN cDNA B930007P11 geneChr12: 48.5094047.09617.3Chr15: 13.1492480.876710.00e+00------
393 -1436806_atTrim62tripartite motif-containing 62; distal 3' UTRChr4: 128.5882949.80211.5Chr2: 179.2574630.876710.00e+000.2290.2090.305
394 -1432244_at4930507D10RikRIKEN cDNA 4930507D10 geneChr11: 80.6640286.91411.4Chr3: 10.3271010.876710.00e+00------
395 -1453234_at1300002K09RikRIKEN cDNA 1300002K09 geneChr4: 45.8992906.39012.1ChrX: 112.6373530.876710.00e+00---0.0560.785
396 -1431695_atRph3alrabphilin 3A-like (without C2 domains)Chr11: 75.7133318.24011.2Chr15: 13.1492480.875710.00e+000.214-0.2000.328
397 -1436360_atHkr2GLI-Kruppel family member HKR2; putative far 3' UTRChr7: 13.4939338.90912.3Chr15: 3.2291280.875710.00e+000.306----
398 -1460423_x_atIgk-V5immunoglobulin kappa chain variable 8 (V8)Chr6: 68.0716286.84712.8Chr15: 13.1492480.875710.00e+000.155----
399 -1426196_atLOC56304recombinant antineuraminidase single chain Ig VH and VL domains--6.77616.5Chr15: 13.1492480.875710.00e+00---0.0800.696
400 -1447742_atLaptm5lysosomal-associated protein transmembrane 5; distal 3' UTRChr4: 130.4918137.47011.8Chr19: 53.4592780.875710.00e+000.3190.0020.992
401 -1450596_atOlfr66olfactory receptor 66Chr7: 111.0298277.48114.2Chr2: 179.2574630.875710.00e+000.150-0.1190.563
402 -1440663_atXpo4exportin 4Chr14: 58.2587157.62813.0Chr15: 13.1492480.875710.00e+000.162-0.0970.637
403 -1435670_atTcfap2btranscription factor AP-2 beta (activating enhancer binding protein 2 beta, Char syndrome); distal 3' UTRChr1: 19.2280797.77211.6ChrX: 127.0092660.875710.00e+000.3730.1270.536
404 -1445681_atCdca7cell division cycle associated 7Chr2: 72.3171967.14812.8Chr3: 10.3271010.874710.00e+000.273-0.2550.208
405 -1444854_at1700023E05RikRIKEN cDNA 1700023E05 geneChr10: 81.6421028.7899.7Chr14: 118.8616530.874710.00e+00--0.2290.260
406 -1437512_x_atEbna1bp2EBNA1 binding protein 2Chr4: 118.2985127.67910.3Chr15: 13.1492480.874710.00e+000.271-0.2090.305
407 -1440931_atAW060659expressed sequence AW060659Chr2: 151.9209638.05012.6Chr15: 13.1492480.874710.00e+00------
408 -1444751_atA230106N23hypothetical protein A230106N23Chr15: 100.4293247.36113.5Chr2: 179.2574630.874710.00e+000.213----
409 -1447095_atC86942ESTsChr9: 103.4491907.49812.4ChrX: 112.6373530.874710.00e+00------
410 -1422957_atCcr3chemokine (C-C motif) receptor 3Chr9: 123.9443806.63012.1Chr15: 13.1492480.874710.00e+000.266-0.2380.242
411 -1459825_x_atIgf2bp1insulin-like growth factor 2 mRNA binding; far 3' UTRChr11: 95.8186309.1848.9Chr15: 13.1492480.874710.00e+00--0.0370.859
412 -1436149_atCox5bcytochrome c oxidase, subunit Vb; last intronChr1: 36.7499449.18911.5Chr2: 67.6943500.874710.00e+000.289-0.2520.215
413 -1458127_at6030446J10RikESTsChr9: 35.2485038.07214.4Chr15: 13.1492480.873710.00e+00------
414 -1445063_atRims1regulating synaptic membrane exocytosis 1; antisense in intronChr1: 22.2973727.67312.1ChrX: 112.6373530.873710.00e+000.2510.3620.069
415 -1429669_atGnao1guanine nucleotide binding protein, alpha o; intron 2Chr8: 96.3356678.24211.2ChrX: 112.6373530.873710.00e+000.2910.3160.116
416 -1457101_at2610017K16Rik0 day neonate thymus cDNA, RIKEN full-length enriched library, clone:A430035F24 product:unclassifiable, full insert sequence.Chr10: 19.4361007.19910.2Chr19: 53.4592780.873710.00e+00------
417 -1437373_atIsg20l1interferon stimulated exonuclease gene 20-like 1; far 3' UTRChr7: 86.0555369.20510.9Chr2: 179.2574630.873710.00e+000.162-0.1240.545
418 -1432651_at2510019K15RikRIKEN cDNA 2510019K15 geneChr2: 154.4264287.20713.7Chr19: 53.4592780.873710.00e+00------
419 -1431277_atPla2g6phospholipase A2, group VI; 5' UTRChr15: 79.1581348.9359.9Chr2: 178.9427860.873710.00e+000.305-0.1170.569
420 -1458995_atAA408296ESTsChr1: 194.9554806.82614.9ChrX: 112.6373530.873710.00e+000.243-0.0840.682
421 -1447324_atA230106N23ESTs--7.48311.4Chr3: 10.3271010.873710.00e+00------
422 -1425377_atWnt1wingless-related MMTV integration site 1; mid and distal 3' UTRChr15: 98.6236697.56115.9Chr15: 13.1492480.873710.00e+000.498-0.2970.140
423 -1449609_atAA189214Mus musculus transcribed sequencesChr4: 125.6431036.97312.0Chr15: 13.1492480.873710.00e+00------
424 -1437516_atLOC168850hypothetical protein LOC168850; 3' UTR (or last intron)Chr6: 28.1922617.86111.5ChrX: 127.0092660.873710.00e+00------
425 -1420144_x_atD930043C02Rikmembrane-associated nucleic acid binding proteinChr2: 37.2691017.37313.9Chr15: 13.1492480.873710.00e+000.188----
426 -1459225_atGnl3lguanine nucleotide binding protein-like 3 (nucleolar)-likeChrX: 147.4450327.70313.3Chr15: 13.1492480.872710.00e+000.2180.1510.461
427 -1437846_x_atBace2beta-site APP-cleaving enzyme 2; proximal 3' UTR (short probe set target sequence)Chr16: 97.6585837.12411.8Chr3: 10.3271010.872710.00e+000.2360.3290.100
428 -1433836_a_atC10orf10fasting-induced protein (decidual protein induced by progesterone); distal 3' UTRChr6: 116.60258911.81210.6Chr19: 53.4592780.872710.00e+000.252----
429 -1438453_atRad51cRad51 homolog c; proximal 3' UTRChr11: 87.1921217.80811.5Chr15: 13.1492480.872710.00e+000.197-0.2810.164
430 -1436171_atArhgap30Rho GTPase activating protein 30; mid to distal 3' UTRChr1: 173.3397748.02714.5Chr15: 13.1492480.872710.00e+00---0.1620.429
431 -1453982_at4933417N07RikRIKEN cDNA 4933417N07 geneChr1: 134.8583248.05211.8Chr15: 13.1492480.872710.00e+00------
432 -1460537_atEbf2early B-cell factor 2Chr14: 67.9603297.00210.8Chr15: 13.1492480.872710.00e+000.3930.0970.637
433 -1425579_atGfra2glial cell line derived neurotrophic factor family receptor alpha 2Chr14: 71.2900148.15710.3ChrX: 112.6373530.871710.00e+000.299-0.2160.289
434 -1457019_s_atNt5c1b5'-nucleotidase, cytosolic IBChr12: 10.3997588.19312.3Chr3: 10.3271010.871710.00e+000.165-0.1010.623
435 -1425150_atC730036D15RikRIKEN cDNA C730036D15 geneChr4: 49.3929847.08511.6Chr15: 13.1492480.871710.00e+000.069-0.1430.484
436 -1419703_atCol5a3procollagen, type V, alpha 3Chr9: 20.5745157.84311.2Chr5: 128.2649740.871710.00e+000.2780.1700.408
437 -1449451_atSerpinb11serine (or cysteine) proteinase inhibitor, clade B (ovalbumin), member 11Chr1: 109.2764947.29416.0Chr15: 13.1492480.871710.00e+000.180-0.3700.063
438 -1431384_atLOC374768LOC374768 protein; last exonChr11: 69.6346887.55511.2Chr15: 13.1492480.871710.00e+00------
439 -1432089_at4930550C17RikRIKEN cDNA 4930550C17 geneChr13: 56.3239917.65312.9Chr15: 13.1492480.871710.00e+00------
440 -1444781_atXab1XPA binding protein 1Chr5: 31.8141617.79310.8Chr19: 53.4592780.871710.00e+000.186-0.0380.855
441 -1421760_atPtcrapre T-cell antigen receptor alphaChr17: 46.8929157.00011.2Chr15: 3.2291280.871710.00e+000.316-0.2250.270
442 -1432570_at6030458E02RikRIKEN cDNA 6030458E02 geneChr19: 60.9906686.82615.0Chr3: 10.3271010.871710.00e+00------
443 -1449077_atCenppcentromere protein P; intron 4Chr13: 49.6340108.14612.7Chr3: 149.0891950.871710.00e+00--0.2960.142
444 -1421759_a_atFoxk1forkhead box K1Chr5: 142.9246779.22313.1Chr15: 13.1492480.871710.00e+000.3750.2540.211
445 -1444370_atC77058expressed sequence C77058Chr11: 77.0108697.96011.2ChrX: 112.6373530.871710.00e+00------
446 -1452553_atA330035P11RikRIKEN cDNA A330035P11 geneChr5: 34.2910367.14616.1Chr3: 149.0891950.871710.00e+00---0.0290.888
447 -1459148_atPax3paired box gene 3Chr1: 78.1046256.36014.1Chr15: 13.1492480.871710.00e+000.478-0.3920.048
448 -1447076_atNell1NEL-like 1, protein kinase C-binding protein NELL1; putative far 3' UTR (from EST AK079065)Chr7: 58.1202067.00213.8Chr15: 13.1492480.871710.00e+000.404-0.0850.679
449 -1430866_at4921537D05RikRIKEN cDNA 4921537D05 geneChr10: 94.2049327.19210.6Chr6: 89.2984460.871710.00e+00------
450 -1453528_atLta4hleukotriene A4 hydrolase (inflammatory response to infection, heart attack risk-associated); intron 1Chr10: 92.9165348.42510.1ChrX: 112.6373530.871710.00e+000.255-0.2990.137
451 -1457752_atAU014876expressed sequence AU014876Chr16: 16.5780376.94911.6Chr3: 10.3271010.870710.00e+00------
452 -1456237_x_atHeatr2HEAT repeat containing 2; mid 3' UTRChr5: 139.6620486.94112.4Chr15: 13.1492480.870710.00e+00------
453 -1430584_s_atCar3carbonic anhydrase 3Chr3: 14.8653777.38010.8Chr2: 179.2574630.870710.00e+000.3270.1470.473
454 -1458807_atEpb4.1erythrocyte protein band 4.1Chr4: 131.4901977.6959.8Chr3: 10.3271010.870710.00e+000.332-0.1640.424
455 -1432805_at8030431A06RikRIKEN cDNA 8030431A06 geneChr14: 33.6778808.84311.6Chr15: 13.1492480.870710.00e+00------
456 -1458553_atPlatplasminogen activator, tissueChr8: 23.8865087.30312.8Chr15: 13.1492480.869710.00e+000.3100.3080.125
457 -1446698_atLppLIM domain containing preferred translocation partner in lipomaChr16: 24.9725737.47314.3Chr3: 10.3271010.869710.00e+000.3130.2620.195
458 -1453951_a_at4930463G05RikRIKEN cDNA 4930463G05 geneChr19: 47.9048737.87811.8Chr2: 178.9427860.869710.00e+00------
459 -1443825_x_atSpaca3sperm acrosome associated 3; mid 3' UTRChr11: 80.6812477.16514.3Chr15: 13.1492480.869710.00e+000.241-0.1580.442
460 -1451719_atCrsp6cofactor required for Sp1 transcriptional activation, subunit 6Chr9: 15.0649067.75814.8Chr15: 13.1492480.869710.00e+000.211----
461 -1440939_atSept6septin 6ChrX: 34.4514057.62112.9Chr15: 13.1492480.869710.00e+000.2930.1080.600
462 -1424700_atTmem38btransmembrane protein 38B (trimeric intracellular cation channel type B); last exonChr4: 53.87287011.69511.9Chr2: 67.6943500.869710.00e+000.183-0.0230.912
463 -1424262_atIba2ionized calcium binding adapter 2; mid distal 3' UTRChr2: 31.82836711.3448.6Chr3: 10.3271010.869710.00e+000.174----
464 -1425233_at2210407C18RikRIKEN cDNA 2210407C18 geneChr11: 58.4217917.93111.2Chr2: 67.6943500.869710.00e+00---0.0430.837
465 -1439921_atAldh1a2aldehyde dehydrogenase 1 family, member A2; putative short transcript expressed from antisense of promoterChr9: 71.0597657.20111.5ChrX: 112.6373530.869710.00e+00--0.1780.384
466 -1452541_atEpb4.1l2erythrocyte protein band 4.1-like 2Chr10: 25.2195818.5489.4Chr15: 13.1492480.868710.00e+000.2790.0630.760
467 -1422935_x_atCbx1chromobox homolog 1 (Drosophila HP1 beta); last exon and 3' UTRChr11: 96.6679747.8679.3ChrX: 112.6373530.868710.00e+000.243-0.2150.292
468 -1443647_atRoraretinoic acid receptor-related orphan receptor alpha; antisense in intron 1Chr9: 68.5035967.46911.9Chr15: 13.1492480.868710.00e+000.387-0.0150.942
469 -1427727_x_atPsg19pregnancy specific glycoprotein 19Chr7: 19.3099358.22112.5Chr3: 10.3271010.868710.00e+000.233-0.3890.050
470 -1446347_atEplinESTs, Weakly similar to RIKEN cDNA 5730493B19 [] [M.musculus]Chr2: 92.8806648.85112.0Chr19: 53.4592780.868710.00e+00------
471 -1425244_a_atThegtesticular haploid expressed geneChr10: 79.0465087.86214.2Chr15: 13.1492480.868710.00e+000.197-0.0990.629
472 -1451823_atClca4chloride channel calcium activated 4Chr3: 144.4857666.69111.0ChrX: 112.6373530.868710.00e+000.280-0.0870.674
473 -1426101_atKlhl2kelch-like 2, Mayven (Drosophila)Chr8: 67.3315497.31211.1Chr15: 13.1492480.868710.00e+000.2600.2510.215
474 -1436730_atMclcmyeloid cell leukemia sequence 1, related sequence 1Chr3: 108.4811938.33014.3Chr15: 13.1492480.867710.00e+000.154----
475 -1444191_atBB2104610 day neonate lung cDNA, RIKEN full-length enriched library, clone:E030029G01 product:unclassifiable, full insert sequence.Chr12: 100.7863867.43114.1Chr15: 13.1492480.867710.00e+00------
476 -1456594_atBB728372polymorphic LTR from sequence in BB728372 (distal Chr 15 in BXD Eye data)Chr15: 103.2023976.75714.8Chr3: 10.3271010.867710.00e+00------
477 -1436785_a_at1110069O07RikRIKEN cDNA 1110069O07 geneChr11: 3.9788079.18611.9Chr15: 13.1492480.867710.00e+00--0.0820.689
478 -1447530_atF8afactor 8-associated gene A; proximal 3' UTRChrX: 70.47480512.01510.5Chr15: 13.1492480.867710.00e+000.2130.1650.421
479 -1427169_at2810455B10RikRIKEN cDNA 2810455B10 geneChr1: 62.3224568.06713.5Chr19: 53.4592780.867710.00e+000.200----
480 -1425333_at1810048P08RikRIKEN cDNA 1810048P08 geneChr6: 87.7388779.03811.4Chr1: 193.7319960.867710.00e+000.177----
481 -1442985_atHmcn1hemicentin (fibulin 6, extracellular matrix EGF-domain protein linked to macular degeneration); antisense in intron 7 from EST AU046291Chr1: 152.6600407.30111.5Chr15: 13.1492480.867710.00e+00--0.0410.841
482 -1456731_x_atPolr3kpolymerase (RNA) III (DNA directed) polypeptide KChr2: 181.6041308.92610.8Chr1: 193.7319960.867710.00e+000.249-0.1470.473
483 -1459063_at4833412C05RikESTsChr18: 61.3389776.53311.1Chr3: 10.3271010.866710.00e+00--0.4150.035
484 -1432888_at4930455M05RikRIKEN cDNA 4930455M05 geneChr13: 59.5207806.72814.8Chr3: 10.3271010.866710.00e+00------
485 -1447759_x_atCcdc22coiled-coil domain containing 22; 3' UTRChrX: 7.17093610.0088.9Chr3: 10.3271010.866710.00e+00------
486 -1452889_atLhppphospholysine phosphohistidine inorganic pyrophosphate phosphatase; mid and distal 3' UTRChr7: 139.89757210.04912.4ChrX: 112.6373530.866710.00e+000.202----
487 -1443490_atTpm3tropomyosin 3, gammaChr3: 89.8861638.45511.9Chr15: 13.1492480.866710.00e+000.3180.1960.337
488 -1430055_at1700080G18RikRIKEN cDNA 1700080G18 geneChr6: 29.9974407.97115.6Chr15: 13.1492480.866710.00e+00--0.1350.512
489 -1432682_at5530400K19RikRIKEN cDNA 5530400K19 geneChr1: 155.5631947.02414.8ChrX: 112.6373530.866710.00e+00------
490 -1418767_atCyp4f13cytochrome P450, family 4, subfamily f, polypeptide 13; last 5 exons and 3' UTRChr17: 33.0616469.00813.1Chr1: 193.7319960.866710.00e+000.2630.3320.098
491 -1418661_atAbhd2abhydrolase domain containing 2Chr7: 86.5007317.74617.9Chr15: 13.1492480.866710.00e+000.2430.1040.613
492 -1442360_atA930012M21RikRIKEN cDNA A930012M21 geneChr5: 132.1727686.77212.2Chr15: 13.1492480.866710.00e+00------
493 -1430715_at2310014L17RikRIKEN cDNA 2310014L17 geneChr7: 13.5147707.29613.5Chr15: 13.1492480.865710.00e+00---0.1340.513
494 -1444281_atCamk2dcalcium/calmodulin-dependent protein kinase II, delta; intron 2Chr3: 126.3068046.78912.4Chr1: 193.7319960.865710.00e+000.3170.3250.105
495 -1427911_at2610307O08RikRIKEN cDNA 2610307O08 geneChr18: 35.8939488.33312.0Chr15: 13.1492480.865710.00e+000.193----
496 -1444928_atSlc35f1solute carrier family 35, member F1; antisense in intron 1 (from AK141358)Chr10: 52.4143137.39611.6Chr15: 13.1492480.865710.00e+000.1120.2060.312
497 -1443318_atE130016E03RikRIKEN cDNA E130016E03 geneChr4: 11.5114657.71912.2ChrX: 112.6373530.865710.00e+00---0.3460.083
498 -1443687_x_atH2-DMb1histocompatibility 2, class II, locus Mb1Chr17: 33.7620287.79812.4Chr14: 118.8616530.865710.00e+000.2470.0770.708
499 -1425540_atOtcornithine transcarbamylaseChrX: 9.8934727.97612.5ChrX: 112.6373530.864710.00e+000.339-0.0770.710
500 -1420943_atZfp185zinc finger protein 185ChrX: 70.2750247.52211.0Chr2: 178.9427860.864710.00e+000.2390.0940.648

- - - -

-

- - - - - {% endblock %} \ No newline at end of file +{% extends "base.html" %} +{% block content %} + + + + + + + + {% for trait in correlation_data %} + + + + {% endfor %} + +
Correlation
{{ correlation_data[trait] }}
+{% endblock %} + + +{% block js %} + + + + + + + +{% endblock %} \ No newline at end of file -- cgit v1.2.3 From 466be48f92d4943995c7a3e7bcb9fd1efd775bf6 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 30 May 2013 23:14:50 +0000 Subject: Rewrote some code in get_trait_info in dataset.py Added spearman correlation to show_corr_results and template --- wqflask/base/data_set.py | 123 +++++++++++++---------- wqflask/base/trait.py | 4 +- wqflask/wqflask/correlation/show_corr_results.py | 36 ++++--- wqflask/wqflask/templates/correlation_page.html | 52 +++++++--- 4 files changed, 126 insertions(+), 89 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index c2380f8c..4c5c46a5 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -672,12 +672,13 @@ class MrnaAssayDataSet(DataSet): query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type, self.type, self.type)) - #XZ, 03/04/2009: Xiaodong changed Data to %sData and changed parameters from %(item,item, db.type,item,item) to %(db.type, item,item, db.type,item,item) + for item in sample_ids_step: query += """ left join {}Data as T{} on T{}.Id = {}XRef.DataId and T{}.StrainId={}\n """.format(*mescape(self.type, item, item, self.type, item, item)) + query += """ WHERE {}XRef.{}FreezeId = {}Freeze.Id and {}Freeze.Name = '{}' @@ -690,17 +691,19 @@ class MrnaAssayDataSet(DataSet): trait_count = len(trait_sample_data[0]) self.trait_data = collections.defaultdict(list) + # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values - for j in range(trait_count): - trait_name = trait_sample_data[0][j][0] - for i in range(int(number_chunks)): - self.trait_data[trait_name] += trait_sample_data[i][j][data_start_pos:] - + for trait_counter in range(trait_count): + trait_name = trait_sample_data[0][trait_counter][0] + for chunk_counter in range(int(number_chunks)): + self.trait_data[trait_name] += ( + trait_sample_data[chunk_counter][trait_counter][data_start_pos:]) + def get_trait_info(self, trait_list=None, species=''): - # Note: setting trait_list to [] is probably not a great idea. + # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] @@ -709,9 +712,7 @@ class MrnaAssayDataSet(DataSet): if not this_trait.haveinfo: this_trait.retrieveInfo(QTL=1) - if this_trait.symbol: - pass - else: + if not this_trait.symbol: this_trait.symbol = "N/A" #XZ, 12/08/2008: description @@ -719,60 +720,56 @@ class MrnaAssayDataSet(DataSet): description_string = str(this_trait.description).strip() target_string = str(this_trait.probe_target_description).strip() - description_display = '' - if len(description_string) > 1 and description_string != 'None': description_display = description_string else: description_display = this_trait.symbol - if len(description_display) > 1 and description_display != 'N/A' and len(target_string) > 1 and target_string != 'None': + if (len(description_display) > 1 and description_display != 'N/A' and + len(target_string) > 1 and target_string != 'None'): description_display = description_display + '; ' + target_string.strip() # Save it for the jinja2 template this_trait.description_display = description_display - #print(" xxxxdd [%s]: %s" % (type(this_trait.description_display), description_display)) #XZ: trait_location_value is used for sorting trait_location_repr = 'N/A' trait_location_value = 1000000 if this_trait.chr and this_trait.mb: - try: - trait_location_value = int(this_trait.chr)*1000 + this_trait.mb - except: - if this_trait.chr.upper() == 'X': - trait_location_value = 20*1000 + this_trait.mb - else: - trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb - - this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, float(this_trait.mb) ) + #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y") + #This is so we can convert the location to a number used for sorting + trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb) + #try: + # trait_location_value = int(this_trait.chr)*1000 + this_trait.mb + #except ValueError: + # if this_trait.chr.upper() == 'X': + # trait_location_value = 20*1000 + this_trait.mb + # else: + # trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 + + # this_trait.mb) + + #ZS: Put this in function currently called "convert_location_to_value" + this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr, + float(this_trait.mb)) this_trait.location_value = trait_location_value - #this_trait.trait_location_value = trait_location_value - #XZ, 01/12/08: This SQL query is much faster. + #Get mean expression value query = ( -"""select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = %s and - ProbeSet.Id = ProbeSetXRef.ProbeSetId and - ProbeSet.Name = '%s' + """select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = %s and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '%s' """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) print("query is:", pf(query)) result = g.db.execute(query).fetchone() + + mean = result[0] if result else 0 - if result: - if result[0]: - mean = result[0] - else: - mean=0 - else: - mean = 0 - - #XZ, 06/05/2009: It is neccessary to turn on nowrap - this_trait.mean = repr = "%2.3f" % mean + this_trait.mean = "%2.3f" % mean #LRS and its location this_trait.LRS_score_repr = 'N/A' @@ -791,23 +788,39 @@ class MrnaAssayDataSet(DataSet): result = self.cursor.fetchone() if result: - if result[0] and result[1]: - LRS_Chr = result[0] - LRS_Mb = result[1] - - #XZ: LRS_location_value is used for sorting - try: - LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb) - except: - if LRS_Chr.upper() == 'X': - LRS_location_value = 20*1000 + float(LRS_Mb) - else: - LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb) + #if result[0] and result[1]: + # lrs_chr = result[0] + # lrs_mb = result[1] + lrs_chr, lrs_mb = result + #XZ: LRS_location_value is used for sorting + lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb) + + #try: + # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb) + #except: + # if lrs_chr.upper() == 'X': + # lrs_location_value = 20*1000 + float(lrs_mb) + # else: + # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 + + # float(lrs_mb)) + + this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs + this_trait.LRS_score_value = this_trait.lrs + this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) + + + def convert_location_to_value(chromosome, mb): + try: + location_value = int(chromosome)*1000 + float(mb) + except ValueError: + if chromosome.upper() == 'X': + location_value = 20*1000 + float(mb) + else: + location_value = (ord(str(chromosome).upper()[0])*1000 + + float(mb)) + + return location_value - this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs - this_trait.LRS_score_value = LRS_score_value = this_trait.lrs - this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb) ) - def get_sequence(self): query = """ SELECT diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 7c1c035c..5fde114f 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -15,7 +15,7 @@ from pprint import pformat as pf from flask import Flask, g -class GeneralTrait: +class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, Published phenotype, genotype, or user input trait @@ -78,7 +78,7 @@ class GeneralTrait: #desc = self.handle_pca(desc) stringy = desc return stringy - + def display_name(self): diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index aa20eba1..5d40c835 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -30,7 +30,6 @@ from __future__ import absolute_import, print_function, division import string -from math import * import cPickle import os import time @@ -106,6 +105,7 @@ class CorrelationResults(object): corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} + self.corr_method = start_vars['corr_sample_method'] #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples @@ -123,27 +123,31 @@ class CorrelationResults(object): #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() + self.correlation_data = {} for trait, values in self.target_dataset.trait_data.iteritems(): - trait_values = [] + this_trait_values = [] target_values = [] for index, sample in enumerate(self.target_dataset.samplelist): - target_value = values[index] - if sample in self.sample_data.keys(): - this_value = self.sample_data[sample] - trait_values.append(this_value) - target_values.append(target_value) - (trait_values, target_values) = normalize_values(trait_values, target_values) - correlation = scipy.stats.pearsonr(trait_values, target_values) - #correlation = cal_correlation(trait_values, target_values) - self.correlation_data[trait] = correlation[0] - #print ('correlation result: %s %s' % (trait, correlation)) - - for trait in self.correlation_data: - print("correlation: ", self.correlation_data[trait]) - + if sample in self.sample_data: + sample_value = self.sample_data[sample] + target_sample_value = values[index] + this_trait_values.append(sample_value) + target_values.append(target_sample_value) + + this_trait_values, target_values = normalize_values(this_trait_values, target_values) + if self.corr_method == 'pearson': + sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) + else: + sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) + self.correlation_data[trait] = [sample_r, sample_p] + self.correlation_data = collections.OrderedDict( + sorted(self.correlation_data.items(), + key=lambda t: -abs(t[1][0]))) + #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index be750a0c..68fe81ed 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -1,21 +1,42 @@ {% extends "base.html" %} -{% block content %} - - - - - - - - {% for trait in correlation_data %} - - - - {% endfor %} - -
Correlation
{{ correlation_data[trait] }}
+{% block css %} + + + + {% endblock %} +{% block content %} +
+
+

Correlation

+
+
+ + + + + + {% if corr_method == 'pearson' %} + + + {% else %} + + + {% endif %} + + + + {% for trait in correlation_data %} + + + + + + {% endfor %} + +
TraitSample rSample p(r)Sample rhoSample p(rho)
{{ trait }}{{ correlation_data[trait][0] }}{{ correlation_data[trait][1] }}
+{% endblock %} {% block js %} @@ -23,7 +44,6 @@ - @@ -274,9 +175,14 @@ - @@ -175,16 +275,9 @@ + {% endblock %} + diff --git a/wqflask/wqflask/templates/show_trait_details.html b/wqflask/wqflask/templates/show_trait_details.html index c3abfc9f..b57c3c21 100644 --- a/wqflask/wqflask/templates/show_trait_details.html +++ b/wqflask/wqflask/templates/show_trait_details.html @@ -19,11 +19,11 @@ BLAT Specifity -
{{ "%.1f" % (this_trait.probe_set_specificity) }}
+
{{ "%s" % (this_trait.probe_set_specificity) }}
{% endif %} {% if this_trait.probe_set_blat_score %}
BLAT Score
-
{{ "%i" % (this_trait.probe_set_blat_score) }}
+
{{ "%s" % (this_trait.probe_set_blat_score) }}
{% endif %} -- cgit v1.2.3 From 25bd2fa7ac229eb7862fe778fe03eb75ff34368c Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 13 Jun 2013 21:13:51 +0000 Subject: Fixed issue where too much memory was used as a result of creating a dataset object for each trait in the correlation results Added new fields/columns for each trait in the correlation result table (max LRS, max LRS location, mean expression) Fixed error if trait doesn't have these fields --- wqflask/base/data_set.py | 30 +++---- wqflask/base/trait.py | 27 +++++-- wqflask/utility/helper_functions.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 99 +++++++++++------------- wqflask/wqflask/search_results.py | 2 +- 5 files changed, 83 insertions(+), 77 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 0c7676c4..0903bf16 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -90,8 +90,8 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): - print("length of self.markers:", len(self.markers)) - print("length of p_values:", len(p_values)) + #print("length of self.markers:", len(self.markers)) + #print("length of p_values:", len(p_values)) # THIS IS only needed for the case when we are limiting the number of p-values calculated if len(self.markers) < len(p_values): @@ -161,7 +161,7 @@ class DatasetGroup(object): self.f1list = None self.parlist = None self.get_f1_parent_strains() - print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) + #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) self.species = webqtlDatabaseFunction.retrieve_species(self.name) @@ -170,7 +170,7 @@ class DatasetGroup(object): def get_markers(self): - print("self.species is:", self.species) + #print("self.species is:", self.species) if self.species == "human": marker_class = HumanMarkers else: @@ -293,14 +293,14 @@ class DataSet(object): self.name, self.name, self.name)) - print("query_args are:", query_args) + #print("query_args are:", query_args) - print(""" - SELECT Id, Name, FullName, ShortName - FROM %s - WHERE public > %s AND - (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - """ % (query_args)) + #print(""" + # SELECT Id, Name, FullName, ShortName + # FROM %s + # WHERE public > %s AND + # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + # """ % (query_args)) self.id, self.name, self.fullname, self.shortname = g.db.execute(""" SELECT Id, Name, FullName, ShortName @@ -624,12 +624,12 @@ class MrnaAssayDataSet(DataSet): and ProbeSetFreezeId = {} """.format(escape(str(self.id))) results = g.db.execute(query).fetchall() - print("After get_trait_list query") + #print("After get_trait_list query") trait_data = {} for trait in results: print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - print("After retrieve_sample_data") + #print("After retrieve_sample_data") return trait_data def get_trait_data(self): @@ -763,7 +763,7 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - print("query is:", pf(query)) + #print("query is:", pf(query)) result = g.db.execute(query).fetchone() @@ -926,7 +926,7 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): dataset_table = ob.type + "Freeze" - print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + #print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 53f41779..f333d5a7 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,6 +1,8 @@ from __future__ import absolute_import, division, print_function import string +import resource + from htmlgen import HTMLgen2 as HT @@ -15,6 +17,10 @@ from pprint import pformat as pf from flask import Flask, g +def print_mem(stage=""): + mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print("{}: {}".format(stage, mem/1024)) + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -23,8 +29,12 @@ class GeneralTrait(object): """ def __init__(self, **kw): - #print("in GeneralTrait") - self.dataset = kw.get('dataset') # database name + # xor assertion + assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. xor name"; + if kw.get('dataset_name'): + self.dataset = create_dataset(kw.get('dataset_name')) + else: + self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') @@ -39,8 +49,6 @@ class GeneralTrait(object): # self.cellid is set to None above elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - - self.dataset = create_dataset(self.dataset) # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary @@ -355,8 +363,17 @@ class GeneralTrait(object): #traitQTL = self.cursor.fetchone() if traitQTL: self.locus, self.lrs, self.pvalue, self.mean = traitQTL + if self.locus: + result = g.db.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """, (species, self.locus)).fetchone() + self.locus_chr = result[0] + self.locus_mb = result[1] else: - self.locus = self.lrs = self.pvalue = self.mean = "" + self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = "" if self.dataset.type == 'Publish': traitQTL = g.db.execute(""" SELECT diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 28242c27..d76a32ce 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -9,7 +9,7 @@ def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" self.dataset = data_set.create_dataset(start_vars['dataset']) self.species = TheSpecies(dataset=self.dataset) - self.this_trait = GeneralTrait(dataset=self.dataset.name, + self.this_trait = GeneralTrait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 96c0155b..3b8b7ba2 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -92,11 +92,6 @@ class CorrelationResults(object): # #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} - - #def error(self, message, *args, **kw): - # heading = heading or self.PAGE_HEADING - # return templatePage.error(heading = heading, detail = [message], error=error) - def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets @@ -104,10 +99,8 @@ class CorrelationResults(object): #self.this_trait = GeneralTrait(dataset=self.dataset.name, # name=start_vars['trait_id'], # cellid=None) - #print("start_vars: ", pf(start_vars)) with Bench("Doing correlations"): - print_mem("At beginning") helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() @@ -138,7 +131,6 @@ class CorrelationResults(object): self.correlation_data = {} - print_mem("Before calculating correlations") for trait, values in self.target_dataset.trait_data.iteritems(): this_trait_values = [] target_values = [] @@ -150,63 +142,60 @@ class CorrelationResults(object): target_values.append(target_sample_value) this_trait_values, target_values = normalize_values(this_trait_values, target_values) - + if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) - + self.correlation_data[trait] = [sample_r, sample_p] - - print_mem("After calculating correlations") - + self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) - + self.correlation_data_slice = collections.OrderedDict() - - old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - + for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]): - print_mem("In trait info loop") - print("\nTrait #:", trait_counter) - print_mem("Before trait_object") - trait_object = GeneralTrait(dataset=self.dataset.name, name=trait) - print_mem("After trait object") - trait_info = dict( - correlation = float(self.correlation_data[trait][0]), - p_value = float(self.correlation_data[trait][1]), - symbol = trait_object.symbol, - alias = trait_object.alias, - description = trait_object.description, - chromosome = trait_object.chr, - mb = trait_object.mb - ) - print_mem("Before deleting trait object") - del trait_object - print_mem("After deleting trait object") - gc.collect() - print_mem("After colleting garabage") - print("** trait_info:", pf(trait_info)) - print("\n** Start trait_info") - counter = 1 - for key, value in trait_info.iteritems(): - print(" <{}> [{}] {}: [{}] {}\n".format( - counter, type(key), key, type(value), value)) - counter += 1 - print("** Done trait_info") + trait_object = GeneralTrait(dataset=self.dataset, name=trait) + if self.dataset.type == 'ProbeSet': + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) + if hasattr(trait_object, 'mean'): + trait_info[mean] = trait_object.mean + if hasattr(trait_object, 'lrs'): + trait_info[lrs] = trait_object.lrs + if hasattr(trait_object, 'locus_chr'): + trait_info[locus_chr] = trait_object.locus_chr + if hasattr(trait_object, 'locus_mb'): + trait_info[locus_mb] = trait_object.locus_mb + elif self.dataset.type == 'Geno': + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) + else: # 'Publish' + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) self.correlation_data_slice[trait] = trait_info - #self.correlation_data_slice[trait].append(trait_object) - - new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - print("Memory difference:", new_memory_usage-old_memory_usage) - old_memory_usage = new_memory_usage - print_mem("End of purple loop") - print("*************************** End purple ******** ") - - print_mem("After getting trait info") - print("Garbage colleting...") - gc.collect() #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index dc872a8b..e171f1ab 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -106,7 +106,7 @@ class SearchResultPage(object): print("foo locals are:", locals()) trait_id = result[0] - this_trait = GeneralTrait(dataset=self.dataset.name, name=trait_id) + this_trait = GeneralTrait(dataset=self.dataset, name=trait_id) this_trait.retrieve_info(QTL=True) self.trait_list.append(this_trait) -- cgit v1.2.3 From 6d5a94c699d1653a3ca76a9500082b8803cdaedf Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Wed, 19 Jun 2013 20:58:40 +0000 Subject: Changed QTL parameter name to "get_qtl_info" on trait.py and other places it's called, like search_results.py Added other trait info fields to correlation results page --- wqflask/base/trait.py | 17 ++- wqflask/wqflask/correlation/show_corr_results.py | 126 ++++++++++++----------- wqflask/wqflask/search_results.py | 2 +- wqflask/wqflask/templates/correlation_page.html | 33 +++--- 4 files changed, 98 insertions(+), 80 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index f333d5a7..82e013ae 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -28,7 +28,7 @@ class GeneralTrait(object): """ - def __init__(self, **kw): + def __init__(self, get_qtl_info=False, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. xor name"; if kw.get('dataset_name'): @@ -41,6 +41,14 @@ class GeneralTrait(object): self.haveinfo = kw.get('haveinfo', False) self.sequence = kw.get('sequence') # Blat sequence, available for ProbeSet self.data = kw.get('data', {}) + + # Sets defaultst + self.locus = None + self.lrs = None + self.pvalue = None + self.mean = None + self.num_overlap = None + if kw.get('fullname'): name2 = value.split("::") @@ -52,8 +60,9 @@ class GeneralTrait(object): # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary - self.retrieve_info() + self.retrieve_info(get_qtl_info=get_qtl_info) self.retrieve_sample_data() + def get_name(self): @@ -237,7 +246,7 @@ class GeneralTrait(object): #def items(self): # return self.__dict__.items() - def retrieve_info(self, QTL=False): + def retrieve_info(self, get_qtl_info=False): assert self.dataset, "Dataset doesn't exist" if self.dataset.type == 'Publish': query = """ @@ -347,7 +356,7 @@ class GeneralTrait(object): if result: self.homologeneid = result[0] - if QTL: + if get_qtl_info: if self.dataset.type == 'ProbeSet' and not self.cellid: traitQTL = g.db.execute(""" SELECT diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 3b8b7ba2..3b1ac87d 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -13,19 +13,10 @@ # This program is available from Source Forge: at GeneNetwork Project # (sourceforge.net/projects/genenetwork/). # -# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) -# at rwilliams@uthsc.edu and xzhou15@uthsc.edu -# +# Contact Dr. Robert W. Williams at rwilliams@uthsc.edu # # # This module is used by GeneNetwork project (www.genenetwork.org) -# -# Created by GeneNetwork Core Team 2010/08/10 -# -# Last updated by NL 2011/02/11 -# Last updated by Christian Fernandez 2012/04/07 -# Refactored correlation calculation into smaller functions in preparation of -# separating html from existing code from __future__ import absolute_import, print_function, division @@ -34,7 +25,6 @@ import string import cPickle import os import time -#import pyXLWriter as xl import pp import math import collections @@ -103,29 +93,29 @@ class CorrelationResults(object): with Bench("Doing correlations"): helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() - + corr_samples_group = start_vars['corr_samples_group'] - + self.sample_data = {} self.corr_method = start_vars['corr_sample_method'] - + #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples - + primary_samples = (self.dataset.group.parlist + self.dataset.group.f1list + self.dataset.group.samplelist) - + #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples, ()) - + #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and #exclude the primary samples (because they would have been added in the previous #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) - + self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() @@ -141,61 +131,72 @@ class CorrelationResults(object): this_trait_values.append(sample_value) target_values.append(target_sample_value) - this_trait_values, target_values = normalize_values(this_trait_values, target_values) + this_trait_values, target_values, num_overlap = normalize_values(this_trait_values, + target_values) if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) - self.correlation_data[trait] = [sample_r, sample_p] + self.correlation_data[trait] = [sample_r, sample_p, num_overlap] self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) - self.correlation_data_slice = collections.OrderedDict() + self.correlation_results = [] + + #self.correlation_data_slice = collections.OrderedDict() for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]): - trait_object = GeneralTrait(dataset=self.dataset, name=trait) - if self.dataset.type == 'ProbeSet': - trait_info = collections.OrderedDict( - correlation = float(self.correlation_data[trait][0]), - p_value = float(self.correlation_data[trait][1]), - symbol = trait_object.symbol, - alias = trait_object.alias, - description = trait_object.description, - chromosome = trait_object.chr, - mb = trait_object.mb - ) - if hasattr(trait_object, 'mean'): - trait_info[mean] = trait_object.mean - if hasattr(trait_object, 'lrs'): - trait_info[lrs] = trait_object.lrs - if hasattr(trait_object, 'locus_chr'): - trait_info[locus_chr] = trait_object.locus_chr - if hasattr(trait_object, 'locus_mb'): - trait_info[locus_mb] = trait_object.locus_mb - elif self.dataset.type == 'Geno': - trait_info = collections.OrderedDict( - correlation = float(self.correlation_data[trait][0]), - p_value = float(self.correlation_data[trait][1]), - symbol = trait_object.symbol, - alias = trait_object.alias, - description = trait_object.description, - chromosome = trait_object.chr, - mb = trait_object.mb - ) - else: # 'Publish' - trait_info = collections.OrderedDict( - correlation = float(self.correlation_data[trait][0]), - p_value = float(self.correlation_data[trait][1]), - symbol = trait_object.symbol, - alias = trait_object.alias, - description = trait_object.description, - chromosome = trait_object.chr, - mb = trait_object.mb - ) - self.correlation_data_slice[trait] = trait_info + trait_object = GeneralTrait(dataset=self.dataset, name=trait, get_qtl_info=True) + trait_object.sample_r = self.correlation_data[trait][0] + trait_object.sample_p = self.correlation_data[trait][1] + trait_object_num_overlap = self.correlation_data[trait][2] + self.correlation_results.append(trait_object) + + #self.correlation_data_slice[trait] = self.correlation_data[trait] + #self.correlation_data_slice[trait].append(trait_object) + #if self.dataset.type == 'ProbeSet': + # trait_info = collections.OrderedDict( + # correlation = float(self.correlation_data[trait][0]), + # p_value = float(self.correlation_data[trait][1]), + # symbol = trait_object.symbol, + # alias = trait_object.alias, + # description = trait_object.description, + # chromosome = trait_object.chr, + # mb = trait_object.mb + # ) + # if trait_object.mean: + # trait_info[mean] = trait_object.mean + # if hasattr(trait_object, 'mean'): + # trait_info[mean] = trait_object.mean + # if hasattr(trait_object, 'lrs'): + # trait_info[lrs] = trait_object.lrs + # if hasattr(trait_object, 'locus_chr'): + # trait_info[locus_chr] = trait_object.locus_chr + # if hasattr(trait_object, 'locus_mb'): + # trait_info[locus_mb] = trait_object.locus_mb + #elif self.dataset.type == 'Geno': + # trait_info = collections.OrderedDict( + # correlation = float(self.correlation_data[trait][0]), + # p_value = float(self.correlation_data[trait][1]), + # symbol = trait_object.symbol, + # alias = trait_object.alias, + # description = trait_object.description, + # chromosome = trait_object.chr, + # mb = trait_object.mb + # ) + #else: # 'Publish' + # trait_info = collections.OrderedDict( + # correlation = float(self.correlation_data[trait][0]), + # p_value = float(self.correlation_data[trait][1]), + # symbol = trait_object.symbol, + # alias = trait_object.alias, + # description = trait_object.description, + # chromosome = trait_object.chr, + # mb = trait_object.mb + # ) #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset @@ -1067,8 +1068,9 @@ def normalize_values(values_1, values_2): if values_1[i]!= None and values_2[i]!= None: X.append(values_1[i]) Y.append(values_2[i]) + num_overlap = len(X) - return (X, Y) + return (X, Y, num_overlap) def cal_correlation(values_1, values_2): diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index e171f1ab..4238aa7f 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -107,7 +107,7 @@ class SearchResultPage(object): print("foo locals are:", locals()) trait_id = result[0] this_trait = GeneralTrait(dataset=self.dataset, name=trait_id) - this_trait.retrieve_info(QTL=True) + this_trait.retrieve_info(get_qtl_info=True) self.trait_list.append(this_trait) self.dataset.get_trait_info(self.trait_list, species) diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index b06f7096..efbf689c 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -17,30 +17,37 @@ Trait + Symbol + Alias + Description + Location + Mean Expr + Max LRS + Max LRS Location {% if corr_method == 'pearson' %} Sample r + N Cases Sample p(r) {% else %} Sample rho Sample p(rho) {% endif %} - Symbol - Alias - Description - Location - {% for trait in correlation_data_slice %} + {% for trait in correlation_results %} - {{ trait }} - {{ correlation_data_slice[trait].correlation }} - {{ correlation_data_slice[trait].p_value }} - {{ correlation_data_slice[trait].symbol }} - {{ correlation_data_slice[trait].alias }} - {{ correlation_data_slice[trait].description }} - Chr{{ correlation_data_slice[trait].chromosome }}: {{ correlation_data_slice[trait].mb }} - {# {{ correlation_data_slice[trait][2].__dict__ }} #} + {{ trait.name }} + {{ trait.symbol }} + {{ trait.alias }} + {{ trait.description }} + Chr{{ trait.chr }}: {{ trait.mb }} + {{ trait.mean }} + {{ trait.lrs }} + Chr{{ trait.locus_chr }}: {{ trait.locus_mb }} + {{ trait.sample_r }} + {{ trait.num_overlap }} + {{ trait.sample_p }} {% endfor %} -- cgit v1.2.3 From 083e8a548f53c20d98d02a02093ac421673d6c03 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Wed, 19 Jun 2013 22:16:21 +0000 Subject: Fixed issue where the Mean Expr, Max LRS, and Max LRS Location columns in the correlation page weren't displaying --- wqflask/base/trait.py | 61 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 24 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 38b3a625..801d32c2 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -268,7 +268,7 @@ class GeneralTrait(object): PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - traitInfo = g.db.execute(query).fetchone() + trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif self.dataset.type == 'ProbeSet': @@ -285,8 +285,8 @@ class GeneralTrait(object): """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name)) - traitInfo = g.db.execute(query).fetchone() - #print("traitInfo is: ", pf(traitInfo)) + trait_info = g.db.execute(query).fetchone() + #print("trait_info is: ", pf(trait_info)) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.dataset.type == 'Geno': @@ -303,23 +303,24 @@ class GeneralTrait(object): """ % (escape(display_fields_string), escape(self.dataset.name), escape(self.name)) - traitInfo = g.db.execute(query).fetchone() - #print("traitInfo is: ", pf(traitInfo)) + trait_info = g.db.execute(query).fetchone() + #print("trait_info is: ", pf(trait_info)) else: #Temp type query = """SELECT %s FROM %s WHERE Name = %s """ % (string.join(self.dataset.display_fields,','), self.dataset.type, self.name) - traitInfo = g.db.execute(query).fetchone() + trait_info = g.db.execute(query).fetchone() #self.cursor.execute(query) - #traitInfo = self.cursor.fetchone() - if traitInfo: + #trait_info = self.cursor.fetchone() + if trait_info: self.haveinfo = True #XZ: assign SQL query result to trait attributes. for i, field in enumerate(self.dataset.display_fields): - setattr(self, field, str(traitInfo[i])) + print(" mike: {} -> {} - {}".format(field, type(trait_info[i]), trait_info[i])) + setattr(self, field, trait_info[i]) if self.dataset.type == 'Publish': self.confidential = 0 @@ -327,6 +328,10 @@ class GeneralTrait(object): self.confidential = 1 self.homologeneid = None + + print("self.geneid is:", self.geneid) + print(" type:", type(self.geneid)) + print("self.dataset.group.name is:", self.dataset.group.name) if self.dataset.type == 'ProbeSet' and self.dataset.group and self.geneid: #XZ, 05/26/2010: From time to time, this query get error message because some geneid values in database are not number. #XZ: So I have to test if geneid is number before execute the query. @@ -338,6 +343,8 @@ class GeneralTrait(object): # geneidIsNumber = False #if geneidIsNumber: + + query = """ SELECT HomologeneId @@ -349,6 +356,7 @@ class GeneralTrait(object): InbredSet.SpeciesId = Species.Id AND Species.TaxonomyId = Homologene.TaxonomyId """ % (escape(str(self.geneid)), escape(self.dataset.group.name)) + print("-> query is:", query) result = g.db.execute(query).fetchone() #else: # result = None @@ -358,33 +366,38 @@ class GeneralTrait(object): if get_qtl_info: if self.dataset.type == 'ProbeSet' and not self.cellid: - traitQTL = g.db.execute(""" + query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean FROM ProbeSetXRef, ProbeSet WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSet.Name = "%s" AND - ProbeSetXRef.ProbeSetFreezeId =%s - """, (self.name, self.dataset.id)).fetchone() + ProbeSet.Name = "{}" AND + ProbeSetXRef.ProbeSetFreezeId ={} + """.format(self.name, self.dataset.id) + trait_qtl = g.db.execute(query).fetchone() #self.cursor.execute(query) - #traitQTL = self.cursor.fetchone() - if traitQTL: - self.locus, self.lrs, self.pvalue, self.mean = traitQTL + #trait_qtl = self.cursor.fetchone() + if trait_qtl: + self.locus, self.lrs, self.pvalue, self.mean = trait_qtl if self.locus: - result = g.db.execute(""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species - where Species.Name = '%s' and - Geno.Name = '%s' and + where Species.Name = '{}' and + Geno.Name = '{}' and Geno.SpeciesId = Species.Id - """, (species, self.locus)).fetchone() + """.format(self.dataset.group.species, self.locus) + print("query is:", query) + result = g.db.execute(query).fetchone() self.locus_chr = result[0] self.locus_mb = result[1] else: self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = "" + + if self.dataset.type == 'Publish': - traitQTL = g.db.execute(""" + trait_qtl = g.db.execute(""" SELECT PublishXRef.Locus, PublishXRef.LRS FROM @@ -395,9 +408,9 @@ class GeneralTrait(object): PublishFreeze.Id =%s """, (self.name, self.dataset.id)).fetchone() #self.cursor.execute(query) - #traitQTL = self.cursor.fetchone() - if traitQTL: - self.locus, self.lrs = traitQTL + #trait_qtl = self.cursor.fetchone() + if trait_qtl: + self.locus, self.lrs = trait_qtl else: self.locus = self.lrs = "" else: -- cgit v1.2.3