From fdd28defcaf3326f3c6b6507124708d83a1da119 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Sun, 15 Apr 2018 11:57:09 +0300 Subject: Deactivate analysis of email_address field * Prevent elasticsearch from analysing and tokenising the email_address field so as to avoid issue with getting back all email addresses with the same domain as the one being searched for. --- wqflask/utility/elasticsearch_tools.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'wqflask/utility/elasticsearch_tools.py') diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index d35cb5ee..7d2ee8c9 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -24,6 +24,8 @@ def get_elasticsearch_connection(): "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT }]) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None + setup_users_index(es) + es_logger = logging.getLogger("elasticsearch") es_logger.setLevel(logging.INFO) es_logger.addHandler(logging.NullHandler()) @@ -33,6 +35,17 @@ def get_elasticsearch_connection(): return es +def setup_users_index(es_connection): + if es_connection: + index_settings = { + "properties": { + "email_address": { + "type": "string" + , "index": "not_analyzed"}}} + + es_connection.indices.create(index='users', ignore=400) + es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") + def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) -- cgit v1.2.3 From dda4697505aea2cd950533dfb3a0dfb0e66ec018 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 16 Apr 2018 09:00:52 +0000 Subject: Docs on elasticsearch use --- README.md | 24 ++++++++++++++------ bin/test-website | 2 +- wqflask/utility/elasticsearch_tools.py | 41 ++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) (limited to 'wqflask/utility/elasticsearch_tools.py') diff --git a/README.md b/README.md index 3e7e64d0..59645994 100644 --- a/README.md +++ b/README.md @@ -17,25 +17,35 @@ deploy GN2 and dependencies as a self contained unit on any machine. The database can be run separately as well as the source tree (for developers). See the [installation docs](doc/README.org). -## Test +## Run Once installed GN2 can be run online through a browser interface ```sh -./bin/genenetwork2 +genenetwork2 ``` -(default is http://localhost:5003/). For more examples, including running scripts and a Python REPL -see the startup script [./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2). +(default is http://localhost:5003/). For full examples (you'll need to +set a number of environment variables), including running scripts and +a Python REPL, see the startup script +[./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2). +## Testing -We are building up automated -testing using [mechanize](https://github.com/genenetwork/genenetwork2/tree/master/test/lib) which can be run with +We are building 'Mechanical Rob' automated testing using Python +[requests](https://github.com/genenetwork/genenetwork2/tree/master/test/lib) +which can be run with something like ```sh -./bin/test-website +env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py -a http://localhost:5003 ``` +The GN2_PROFILE is the Guix profile that contains all +dependencies. The ./bin/genenetwork2 script sets up the environment +and executes test-website.py in a Python interpreter. The -a switch +says to run all tests and the URL points to the running GN2 http +server. + ## Documentation User documentation can be found diff --git a/bin/test-website b/bin/test-website index 5935f016..7fbcfd2f 100755 --- a/bin/test-website +++ b/bin/test-website @@ -2,6 +2,6 @@ if [ -z $GN2_PROFILE ]; then echo "Run request tests with something like" - echo env GN2_PROFILE=/home/wrk/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py http://localhost:5003 + echo env GN2_PROFILE=/home/wrk/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py -a http://localhost:5003 exit 1 fi diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index 7d2ee8c9..4d4a9844 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -1,3 +1,44 @@ +# Elasticsearch support +# +# Some helpful commands to view the database: +# +# You can test the server being up with +# +# curl -H 'Content-Type: application/json' http://localhost:9200 +# +# List all indices +# +# curl -H 'Content-Type: application/json' 'localhost:9200/_cat/indices?v' +# +# To see the users index 'table' +# +# curl http://localhost:9200/users +# +# To list all user ids +# +# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d ' +# { +# "query" : { +# "match_all" : {} +# }, +# "stored_fields": [] +# }' +# +# To view a record +# +# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d ' +# { +# "query" : { +# "match" : { "email_address": "pjotr2017@thebird.nl"} +# } +# }' +# +# +# To delete the users index and data (dangerous!) +# +# curl -XDELETE -H 'Content-Type: application/json' 'localhost:9200/users' + + from elasticsearch import Elasticsearch, TransportError import logging -- cgit v1.2.3 From bc1672f8617c56684ae3aeda7018362e818c46d6 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 16 Apr 2018 17:25:14 +0300 Subject: Update mappings for Elasticsearch 6.2. Update logger * Update the indexes mappings to be compatible with the newer Elasticsearch 6.2.* series. Close the index before updating it, and reopen it after to help with the re-indexing of the data. * Update the error logger to include the exception that was thrown. --- wqflask/utility/elasticsearch_tools.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'wqflask/utility/elasticsearch_tools.py') diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index 7d2ee8c9..0dc59d43 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -29,8 +29,8 @@ def get_elasticsearch_connection(): es_logger = logging.getLogger("elasticsearch") es_logger.setLevel(logging.INFO) es_logger.addHandler(logging.NullHandler()) - except: - logger.error("Failed to get elasticsearch connection") + except Exception as e: + logger.error("Failed to get elasticsearch connection", e) es = None return es @@ -40,11 +40,12 @@ def setup_users_index(es_connection): index_settings = { "properties": { "email_address": { - "type": "string" - , "index": "not_analyzed"}}} + "type": "keyword"}}} es_connection.indices.create(index='users', ignore=400) + es_connection.indices.close(index="users") es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") + es_connection.indices.open(index="users") def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) -- cgit v1.2.3 From fcc43dd4008692b27935d90fcfd134d6c5d9495e Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 16 Apr 2018 18:46:29 +0300 Subject: Remove statements that might be causing issues * I can't swear on this, but it seems the presence of these statements was causing elasticsearch to act weird. --- wqflask/utility/elasticsearch_tools.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'wqflask/utility/elasticsearch_tools.py') diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index 76dcaebf..cce210c3 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -84,9 +84,7 @@ def setup_users_index(es_connection): "type": "keyword"}}} es_connection.indices.create(index='users', ignore=400) - es_connection.indices.close(index="users") es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local") - es_connection.indices.open(index="users") def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) -- cgit v1.2.3 From 67e8f12e103f48329d8b3e38125c0e84b9dc089d Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 17 May 2018 16:32:44 +0000 Subject: Added script to quantile normalize a data set and enter its normalized sample data into ElasticSearch Added option to replace trait page sample/strain values with normalized ones Began editing Lei's scatterplot code Changed elasticsearch_tools' get_elasticsearch_connection so that it can also be used for purposes other than user authentication (by adding a "for_user" parameter) --- wqflask/base/anon_collection.py | 22 - wqflask/base/trait_collection.py | 53 --- wqflask/maintenance/quantile_normalize.py | 129 ++++++ wqflask/utility/elasticsearch_tools.py | 5 +- wqflask/wqflask/correlation/corr_scatter_plot.py | 53 +-- wqflask/wqflask/show_trait/SampleList.py | 77 ++-- wqflask/wqflask/show_trait/show_trait.py | 4 - .../wqflask/static/new/css/corr_scatter_plot.css | 40 +- .../static/new/javascript/draw_corr_scatterplot.js | 71 ++- .../wqflask/static/new/javascript/show_trait.js | 13 + .../new/javascript/show_trait_mapping_tools.js | 2 +- wqflask/wqflask/templates/corr_scatterplot.html | 476 +++++++++++---------- .../wqflask/templates/show_trait_edit_data.html | 6 +- 13 files changed, 567 insertions(+), 384 deletions(-) delete mode 100644 wqflask/base/anon_collection.py delete mode 100644 wqflask/base/trait_collection.py create mode 100644 wqflask/maintenance/quantile_normalize.py (limited to 'wqflask/utility/elasticsearch_tools.py') diff --git a/wqflask/base/anon_collection.py b/wqflask/base/anon_collection.py deleted file mode 100644 index dd1aa27f..00000000 --- a/wqflask/base/anon_collection.py +++ /dev/null @@ -1,22 +0,0 @@ -class AnonCollection(TraitCollection): - - def __init__(self, anon_id): - self.anon_id = anon_id - self.collection_members = Redis.smembers(self.anon_id) - print("self.collection_members is:", self.collection_members) - self.num_members = len(self.collection_members) - - - @app.route("/collections/remove", methods=('POST',)) - def remove_traits(traits_to_remove): - print("traits_to_remove:", traits_to_remove) - for trait in traits_to_remove: - Redis.srem(self.anon_id, trait) - - members_now = self.collection_members - traits_to_remove - print("members_now:", members_now) - print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now))) - - # We need to return something so we'll return this...maybe in the future - # we can use it to check the results - return str(len(members_now)) diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py deleted file mode 100644 index d388a3af..00000000 --- a/wqflask/base/trait_collection.py +++ /dev/null @@ -1,53 +0,0 @@ -class TraitCollection(object): - - def __init__(self, is_anon=False): - self.is_anon = is_anon - - - @app.route("/collections/remove", methods=('POST',)) - def remove_traits(): - if is_anon: - AnonCollection.remove_traits() - else: - UserCollection.remove_traits() - - params = request.form - print("params are:", params) - uc_id = params['uc_id'] - uc = model.UserCollection.query.get(uc_id) - traits_to_remove = params.getlist('traits[]') - print("traits_to_remove are:", traits_to_remove) - traits_to_remove = process_traits(traits_to_remove) - print("\n\n after processing, traits_to_remove:", traits_to_remove) - all_traits = uc.members_as_set() - print(" all_traits:", all_traits) - members_now = all_traits - traits_to_remove - print(" members_now:", members_now) - print("Went from {} to {} members in set.".format(len(all_traits), len(members_now))) - uc.members = json.dumps(list(members_now)) - uc.changed_timestamp = datetime.datetime.utcnow() - db_session.commit() - - # We need to return something so we'll return this...maybe in the future - # we can use it to check the results - return str(len(members_now)) - - def __init__(self, anon_id) - self.anon_key = anon_key - self.collection_members = Redis.smembers(self.anon_id) - print("self.collection_members is:", self.collection_members) - self.num_members = len(self.collection_members) - - - @app.route("/collections/remove", methods=('POST',)) - def remove_traits(traits_to_remove): - print("traits_to_remove:", traits_to_remove) - for trait in traits_to_remove: - Redis.srem(self.anon_id, trait) - members_now = self.collection_members - traits_to_remove - print("members_now:", members_now) - print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now))) - - # We need to return something so we'll return this...maybe in the future - # we can use it to check the results - return str(len(members_now)) diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py new file mode 100644 index 00000000..c11073fb --- /dev/null +++ b/wqflask/maintenance/quantile_normalize.py @@ -0,0 +1,129 @@ +from __future__ import absolute_import, print_function, division + +import sys +sys.path.insert(0,'./') + +from itertools import izip + +import MySQLdb +import urlparse + +import numpy as np +import pandas as pd +from elasticsearch import Elasticsearch, TransportError +from elasticsearch.helpers import bulk + +from flask import Flask, g, request + +from wqflask import app +from utility.elasticsearch_tools import get_elasticsearch_connection +from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI + +def parse_db_uri(): + """Converts a database URI to the db name, host name, user name, and password""" + + parsed_uri = urlparse.urlparse(SQL_URI) + + db_conn_info = dict( + db = parsed_uri.path[1:], + host = parsed_uri.hostname, + user = parsed_uri.username, + passwd = parsed_uri.password) + + print(db_conn_info) + return db_conn_info + +def create_dataframe(input_file): + with open(input_file) as f: + ncols = len(f.readline().split("\t")) + + input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=range(1, ncols)) + return pd.DataFrame(input_array) + +#This function taken from https://github.com/ShawnLYU/Quantile_Normalize +def quantileNormalize(df_input): + df = df_input.copy() + #compute rank + dic = {} + for col in df: + dic.update({col : sorted(df[col])}) + sorted_df = pd.DataFrame(dic) + rank = sorted_df.mean(axis = 1).tolist() + #sort + for col in df: + t = np.searchsorted(np.sort(df[col]), df[col]) + df[col] = [rank[i] for i in t] + return df + +def set_data(dataset_name): + orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" + + sample_list = [] + with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh: + for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)): + trait_dict = {} + sample_list = [] + if i == 0: + sample_names = line1.split('\t')[1:] + else: + trait_name = line1.split('\t')[0] + for i, sample in enumerate(sample_names): + this_sample = { + "name": sample, + "value": line1.split('\t')[i+1], + "qnorm": line2.split('\t')[i+1] + } + sample_list.append(this_sample) + query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName + FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet + WHERE Species.Id = InbredSet.SpeciesId and + InbredSet.Id = ProbeFreeze.InbredSetId and + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and + ProbeSetFreeze.Name = '%s' and + ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and + ProbeSetXRef.ProbeSetId = ProbeSet.Id and + ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0]) + Cursor.execute(query) + result_info = Cursor.fetchone() + + yield { + "_index": "traits", + "_type": "trait", + "_source": { + "name": trait_name, + "species": result_info[0], + "group": result_info[1], + "dataset": dataset_name, + "dataset_fullname": result_info[2], + "samples": sample_list, + "transform_types": "qnorm" + } + } + +if __name__ == '__main__': + Conn = MySQLdb.Connect(**parse_db_uri()) + Cursor = Conn.cursor() + + #es = Elasticsearch([{ + # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT + #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None + + es = get_elasticsearch_connection(for_user=False) + + #input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" + #input_df = create_dataframe(input_filename) + #output_df = quantileNormalize(input_df) + + #output_df.to_csv('quant_norm.csv', sep='\t') + + #out_filename = sys.argv[1][:-4] + '_quantnorm.txt' + + #success, _ = bulk(es, set_data(sys.argv[1])) + + response = es.search( + index = "traits", doc_type = "trait", body = { + "query": { "match": { "name": "ENSMUSG00000028982" } } + } + ) + + print(response) \ No newline at end of file diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py index cce210c3..293a9ae6 100644 --- a/wqflask/utility/elasticsearch_tools.py +++ b/wqflask/utility/elasticsearch_tools.py @@ -52,7 +52,7 @@ def test_elasticsearch_connection(): if not es.ping(): logger.warning("Elasticsearch is DOWN") -def get_elasticsearch_connection(): +def get_elasticsearch_connection(for_user=True): """Return a connection to ES. Returns None on failure""" logger.info("get_elasticsearch_connection") es = None @@ -65,7 +65,8 @@ def get_elasticsearch_connection(): "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT }]) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None - setup_users_index(es) + if for_user: + setup_users_index(es) es_logger = logging.getLogger("elasticsearch") es_logger.setLevel(logging.INFO) diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py index 94711c67..831baf7e 100644 --- a/wqflask/wqflask/correlation/corr_scatter_plot.py +++ b/wqflask/wqflask/correlation/corr_scatter_plot.py @@ -6,44 +6,19 @@ from utility import corr_result_helpers from scipy import stats import numpy as np +import utility.logger +logger = utility.logger.getLogger(__name__ ) + class CorrScatterPlot(object): """Page that displays a correlation scatterplot with a line fitted to it""" def __init__(self, params): self.data_set_1 = data_set.create_dataset(params['dataset_1']) self.data_set_2 = data_set.create_dataset(params['dataset_2']) + #self.data_set_3 = data_set.create_dataset(params['dataset_3']) self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1) self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2) - - try: - width = int(params['width']) - except: - width = 800 - - try: - height = int(params['height']) - except: - height = 600 - - try: - circle_color = params['circle_color'] - except: - circle_color = '#3D85C6' - - try: - circle_radius = int(params['circle_radius']) - except: - circle_radius = 5 - - try: - line_color = params['line_color'] - except: - line_color = '#FF0000' - - try: - line_width = int(params['line_width']) - except: - line_width = 1 + #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3) samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data) @@ -60,14 +35,18 @@ class CorrScatterPlot(object): x = np.array(vals_1) y = np.array(vals_2) - slope, intercept, r_value, p_value, _std_err = stats.linregress(x, y) + slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) rx = stats.rankdata(x) ry = stats.rankdata(y) self.rdata = [] self.rdata.append(rx.tolist()) self.rdata.append(ry.tolist()) - srslope, srintercept, srr_value, srp_value, _srstd_err = stats.linregress(rx, ry) + srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry) + + #vals_3 = [] + #for sample in self.trait_3.data: + # vals_3.append(self.trait_3.data[sample].value) self.js_data = dict( data = self.data, @@ -89,13 +68,9 @@ class CorrScatterPlot(object): srslope = srslope, srintercept = srintercept, srr_value = srr_value, - srp_value = srp_value, + srp_value = srp_value - width = width, - height = height, - circle_color = circle_color, - circle_radius = circle_radius, - line_color = line_color, - line_width = line_width + #trait3 = self.trait_3.data, + #vals_3 = vals_3 ) self.jsdata = self.js_data diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index 6d84a960..78bb3b42 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -10,8 +10,12 @@ import numpy as np from scipy import stats from pprint import pformat as pf +import simplejson as json + import itertools +from utility.elasticsearch_tools import get_elasticsearch_connection + import utility.logger logger = utility.logger.getLogger(__name__ ) @@ -33,6 +37,8 @@ class SampleList(object): self.get_attributes() + self.sample_qnorm = get_transform_vals(self.dataset, this_trait) + if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': self.get_extra_attribute_values() @@ -152,36 +158,47 @@ class SampleList(object): return any(sample.variance for sample in self.sample_list) -#def z_score(vals): -# vals_array = np.array(vals) -# mean = np.mean(vals_array) -# stdv = np.std(vals_array) -# -# z_scores = [] -# for val in vals_array: -# z_score = (val - mean)/stdv -# z_scores.append(z_score) -# -# -# -# return z_scores - - -#def z_score(row): -# L = [n for n in row if not np.isnan(n)] -# m = np.mean(L) -# s = np.std(L) -# zL = [1.0 * (n - m) / s for n in L] -# if len(L) == len(row): return zL -# # deal with nan -# retL = list() -# for n in row: -# if np.isnan(n): -# retL.append(nan) -# else: -# retL.append(zL.pop(0)) -# assert len(zL) == 0 -# return retL +def get_transform_vals(dataset, trait): + es = get_elasticsearch_connection(for_user=False) + + logger.info("DATASET NAME:", dataset.name) + + query = '{"bool": {"must": [{"match": {"name": "%s"}}, {"match": {"dataset": "%s"}}]}}' % (trait.name, dataset.name) + + es_body = { + "query": { + "bool": { + "must": [ + { + "match": { + "name": "%s" % (trait.name) + } + }, + { + "match": { + "dataset": "%s" % (dataset.name) + } + } + ] + } + } + } + + response = es.search( index = "traits", doc_type = "trait", body = es_body ) + logger.info("THE RESPONSE:", response) + results = response['hits']['hits'] + + if len(results) > 0: + samples = results[0]['_source']['samples'] + + sample_dict = {} + for sample in samples: + sample_dict[sample['name']] = sample['qnorm'] + + logger.info("SAMPLE DICT:", sample_dict) + return sample_dict + else: + return None def natural_sort_key(x): """Get expected results when using as a key for sort - ints or strings are sorted properly""" diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 8b801396..d6d83c02 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -26,9 +26,6 @@ from db import webqtlDatabaseFunction from pprint import pformat as pf -from utility.tools import flat_files, flat_file_exists -from utility.tools import get_setting - from utility.logger import getLogger logger = getLogger(__name__ ) @@ -306,7 +303,6 @@ def get_nearest_marker(this_trait, this_db): #return "", "" else: return result[0][0] - #return result[0][0], result[1][0] def get_genofiles(this_dataset): jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, this_dataset.group.name) diff --git a/wqflask/wqflask/static/new/css/corr_scatter_plot.css b/wqflask/wqflask/static/new/css/corr_scatter_plot.css index c62d4c9a..a2ebb252 100644 --- a/wqflask/wqflask/static/new/css/corr_scatter_plot.css +++ b/wqflask/wqflask/static/new/css/corr_scatter_plot.css @@ -1,13 +1,41 @@ -.chart { +.nvd3 .nv-axis.nv-x text { + font-size: 16px; + font-weight: normal; + fill: black; +} +.nvd3 .nv-axis.nv-y text { + font-size: 16px; + font-weight: normal; + fill: black; } -.main text { - font: 10px sans-serif; +.nv-x .nv-axis g path.domain { + stroke: black; + stroke-width: 2; } -.axis line, .axis path { - shape-rendering: crispEdges; +.nv-y .nv-axis g path.domain { stroke: black; - fill: none; + stroke-width: 2; +} + +.nvd3 .nv-axis.nv-x path.domain { + stroke-opacity: 1; +} + +.nvd3 .nv-axis.nv-y path.domain { + stroke-opacity: 1; +} + +line.nv-regLine { + stroke: red; + stroke-width: 1; +} + +.nv-axisMin-x, +.nv-axisMax-x, +.nv-axisMin-y, +.nv-axisMax-y { + display: none; } diff --git a/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js b/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js index cfde6f09..c290cdfe 100644 --- a/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js +++ b/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js @@ -1,3 +1,5 @@ +// http://gn2-lei.genenetwork.org/corr_scatter_plot2?dataset_1=HC_M2_0606_P&dataset_2=HC_M2_0606_P&dataset_3=HC_M2_0606_P&trait_1=1427571_at&trait_2=1457022_at&trait_3=1427571_at + var chart; var srchart; @@ -7,7 +9,7 @@ function drawg() { // chart.showLegend(false); chart.duration(300); - chart.color(d3.scale.category10().range()); + //chart.color(d3.scale.category10().range()); chart.pointRange([0, 400]); chart.pointDomain([0, 10]); // @@ -74,18 +76,47 @@ function getdata(size, shape) { slope: js_data.slope, intercept: js_data.intercept }); + + sizemin = 1; + sizemax = 50; + if ('vals_3' in js_data) { + datamin = d3.min(js_data.vals_3); + datamax = d3.max(js_data.vals_3); + colormin = $("#cocolorfrom").val(); + colormax = $("#cocolorto").val(); + compute = d3.interpolate(colormin, colormax); + linear = d3.scale.linear().domain([datamin, datamax]).range([0,1]); + } + for (j = 0; j < js_data.data[0].length; j++) { + if ('trait3' in js_data) { + if (js_data.indIDs[j] in js_data.trait3) { + datav = js_data.trait3[js_data.indIDs[j]].value; + // size = (sizemax - sizemin) * (datav - datamin) / (datamax - datamin) + sizemin; + sizev = map1to2(datamin, datamax, sizemin, sizemax, datav); + } + } else { + datav = 0; + sizev = sizemin; + } data[0].values.push({ x: js_data.data[0][j], y: js_data.data[1][j], name: js_data.indIDs[j], - size: size, - shape: shape + size: sizev, + shape: shape, + v3: datav }); } + console.log(data); return data; } +function map1to2 (min1, max1, min2, max2, v1) { + v2 = (v1 - min1) * (max2 - min2) / (max1 - min1) + min2; + return v2; +} + function srgetdata(size, shape) { var data = []; data.push({ @@ -94,6 +125,12 @@ function srgetdata(size, shape) { intercept: js_data.srintercept }); for (j = 0; j < js_data.rdata[0].length; j++) { + if (js_data.indIDs[j] in js_data.trait3) { + size = js_data.trait3[js_data.indIDs[j]].value; + //console.log("yes "+js_data.indIDs[j]+", "+size); + } else { + //console.log("no "+js_data.indIDs[j]); + } data[0].values.push({ x: js_data.rdata[0][j], y: js_data.rdata[1][j], @@ -163,13 +200,39 @@ function chartupdatewh() { window.dispatchEvent(new Event('resize')); } + function colorer(d) { + datamin = d3.min(js_data.vals_3); + datamax = d3.max(js_data.vals_3); + //colormin = d3.rgb(255,0,0); + //colormax = d3.rgb(0,255,0); + colormin = $("#cocolorfrom").val(); + colormax = $("#cocolorto").val(); + + console.log("colormin: "+colormin); + console.log("colormax: "+colormax); + + compute = d3.interpolate(colormin, colormax); + linear = d3.scale.linear().domain([datamin, datamax]).range([0,1]); + //console.log(d[0].x); + c= compute(linear(d[0].x)); + //console.log(c); + return c; + } + function chartupdatedata() { // var size = $("#marksize").val(); var shape = $("#markshape").val(); // - d3.select('#scatterplot2 svg').datum(nv.log(getdata(size, shape))).call(chart); + d3.select('#scatterplot2 svg').datum(getdata(size, shape)).call(chart); d3.select('#srscatterplot2 svg').datum(nv.log(srgetdata(size, shape))).call(srchart); + // + d3.selectAll('.nv-point') + .attr({ + 'stroke': colorer, + 'fill': colorer + }); + // nv.utils.windowResize(chart.update); nv.utils.windowResize(srchart.update); } diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index dfdafaf0..17afc814 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -559,6 +559,18 @@ })(this)); }; $('#reset').click(reset_samples_table); + switch_qnorm_data = function() { + return $('.trait_value_input').each((function(_this) { + return function(_index, element) { + transform_val = $(element).data('transform') + if (transform_val != "") { + $(element).val(transform_val.toFixed(3)); + } + return transform_val + }; + })(this)); + }; + $('#qnorm').click(switch_qnorm_data); get_sample_table_data = function(table_name) { var samples; samples = []; @@ -871,6 +883,7 @@ $('#exclude_group').click(edit_data_change); $('#block_outliers').click(edit_data_change); $('#reset').click(edit_data_change); + $('#qnorm').click(edit_data_change); return console.log("end"); }); diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js index 81123de7..daa5b3f2 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js +++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js @@ -213,7 +213,7 @@ var form_data, url; console.log("RUNNING GEMMA"); url = "/loading"; - $('input[name=method]').val("gemma_bimbam"); + $('input[name=method]').val("gemma"); $('input[name=num_perm]').val(0); $('input[name=genofile]').val($('#genofile_gemma').val()); $('input[name=maf]').val($('input[name=maf_gemma]').val()); diff --git a/wqflask/wqflask/templates/corr_scatterplot.html b/wqflask/wqflask/templates/corr_scatterplot.html index 87588534..e0f017c2 100644 --- a/wqflask/wqflask/templates/corr_scatterplot.html +++ b/wqflask/wqflask/templates/corr_scatterplot.html @@ -1,222 +1,254 @@ -{% extends "base.html" %} - -{% block css %} - - - - - - - -{% endblock %} - -{% block content %} - -
- -

Correlation Scatterplot

- -
- - {% if trait_1.dataset.type == "ProbeSet" %} -
- X axis: - - {{trait_1.dataset.group.species + " " + trait_1.dataset.group.name + " " + trait_1.dataset.tissue + " " + trait_1.dataset.name + ": " + trait_1.name|string}} - -
-
- [{{trait_1.symbol}} on {{trait_1.location_repr}} Mb] - {{trait_1.description_display}} -
- {% elif trait_1.dataset.type == "Publish" %} -
- X axis: - - {{trait_1.dataset.group.species + " " + trait_1.dataset.group.name + " " + trait_1.dataset.name + ": " + trait_1.name|string}} - -
-
- PubMed: {{trait_1.pubmed_text}} - {{trait_1.description_display}} -
- {% endif %} - -
- - {% if trait_2.dataset.type == "ProbeSet" %} -
- Y axis: - - {{trait_2.dataset.group.species + " " + trait_2.dataset.group.name + " " + trait_2.dataset.tissue + " " + trait_2.dataset.name + ": " + trait_2.name|string}} - -
-
- [{{trait_2.symbol}} on {{trait_2.location_repr}} Mb] - {{trait_2.description_display}} -
- {% elif trait_2.dataset.type == "Publish" %} -
- Y axis: - - {{trait_2.dataset.group.species + " " + trait_2.dataset.group.name + " " + trait_2.dataset.name + ": " + trait_2.name|string}} - -
-
- PubMed: {{trait_2.pubmed_text}} - {{trait_2.description_display}} -
- {% endif %} - -
- -
- - - - - - -
Width pxHeight px
- - - - - -
Mark - - - -
- - - - - - - - -
Label - - px - Number - - px - Axis - - px - Line - - px -
- -
- - - -
-
- -
- -
Save as SVG
- -

Pearson Correlation Scatterplot

- -
- -
- - - - - - - - - - - - - - - -
StatisticValue
Number {{jsdata.num_overlap}}
Slope {{'%0.3f' % jsdata.slope}}
Intercept {{'%0.3f' % jsdata.intercept}}
r value {{'%0.3f' % jsdata.r_value}}
P value {% if jsdata.p_value < 0.001 %}{{'%0.3e' % jsdata.p_value}}{% else %}{{'%0.3f' % jsdata.p_value}}{% endif %}
- Regression Line -
- y = {{'%0.3f' % jsdata.slope}} * x + {{'%0.3f' % jsdata.intercept}} -
- -
- -
- -
- -
Save as SVG
- -

Spearman Rank Correlation Scatterplot

- -
- -
- - - - - - - - - - - - -
StatisticValue
Number {{jsdata.num_overlap}}
Slope {{'%0.3f' % jsdata.srslope}}
Intercept {{'%0.3f' % jsdata.srintercept}}
r value {{'%0.3f' % jsdata.srr_value}}
P value {% if jsdata.srp_value < 0.001 %}{{'%0.3e' % jsdata.srp_value}}{% else %}{{'%0.3f' % jsdata.srp_value}}{% endif %}
- -
- -
- -
- -{% endblock %} - -{% block js %} - - - - - - - - -{% endblock %} +{% extends "base.html" %} + +{% block css %} + + + + + +{% endblock %} + +{% block content %} + +
+ +

Correlation Scatterplot

+ +
+ {% if trait_1.dataset.type == "ProbeSet" %} +
+ X axis: + + {{trait_1.dataset.group.species + " " + trait_1.dataset.group.name + " " + trait_1.dataset.tissue + " " + trait_1.dataset.name + ": " + trait_1.name|string}} + +
+
+ [{{trait_1.symbol}} on {{trait_1.location_repr}} Mb] + {{trait_1.description_display}} +
+ {% elif trait_1.dataset.type == "Publish" %} +
+ X axis: + + {{trait_1.dataset.group.species + " " + trait_1.dataset.group.name + " " + trait_1.dataset.name + ": " + trait_1.name|string}} + +
+
+ PubMed: {{trait_1.pubmed_text}} + {{trait_1.description_display}} +
+ {% endif %} + +
+ + {% if trait_2.dataset.type == "ProbeSet" %} +
+ Y axis: + + {{trait_2.dataset.group.species + " " + trait_2.dataset.group.name + " " + trait_2.dataset.tissue + " " + trait_2.dataset.name + ": " + trait_2.name|string}} + +
+
+ [{{trait_2.symbol}} on {{trait_2.location_repr}} Mb] + {{trait_2.description_display}} +
+ {% elif trait_2.dataset.type == "Publish" %} +
+ Y axis: + + {{trait_2.dataset.group.species + " " + trait_2.dataset.group.name + " " + trait_2.dataset.name + ": " + trait_2.name|string}} + +
+
+ PubMed: {{trait_2.pubmed_text}} + {{trait_2.description_display}} +
+ {% endif %} + +
+ + {% if trait_3 %} + {% if trait_3.dataset.type == "ProbeSet" %} +
+ Cofactor: + + {{trait_3.dataset.group.species + " " + trait_3.dataset.group.name + " " + trait_3.dataset.tissue + " " + trait_3.dataset.name + ": " + trait_3.name|string}} + +
+
+ [{{trait_3.symbol}} on {{trait_3.location_repr}} Mb] + {{trait_3.description_display}} +
+ {% elif trait_3.dataset.type == "Publish" %} +
+ Cofactor: + + {{trait_3.dataset.group.species + " " + trait_3.dataset.group.name + " " + trait_3.dataset.name + ": " + trait_3.name|string}} + +
+
+ PubMed: {{trait_3.pubmed_text}} + {{trait_3.description_display}} +
+ {% endif %} + {% endif %} + + + + +
+ +
+ + + + + + +
Width pxHeight px
+ + + + + + + + +
Label + + px + Number + + px + Axis + + px + Line + + px +
+ +
+ + + +
+ +
+
+
+ Save as SVG +
+
+

Pearson Correlation Scatterplot

+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StatisticValue
Number{{jsdata.num_overlap}}
Slope{{'%0.3f' % jsdata.slope}}
Intercept{{'%0.3f' % jsdata.intercept}}
r value{{'%0.3f' % jsdata.r_value}}
P value{% if jsdata.p_value < 0.001 %}{{'%0.3e' % jsdata.p_value}}{% else %}{{'%0.3f' % jsdata.p_value}}{% endif %}
+ Regression Line +
+ y = {{'%0.3f' % jsdata.slope}} * x + {{'%0.3f' % jsdata.intercept}} +
+
+ +
+ +
+ +
+ Save as SVG +
+
+

Spearman Rank Correlation Scatterplot

+
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
StatisticValue
Number{{jsdata.num_overlap}}
Slope{{'%0.3f' % jsdata.srslope}}
Intercept{{'%0.3f' % jsdata.srintercept}}
r value{{'%0.3f' % jsdata.srr_value}}
P value{% if jsdata.srp_value < 0.001 %}{{'%0.3e' % jsdata.srp_value}}{% else %}{{'%0.3f' % jsdata.srp_value}}{% endif %}
+
+ +
+ +
+ +{% endblock %} + +{% block js %} + + + + + + + + +{% endblock %} diff --git a/wqflask/wqflask/templates/show_trait_edit_data.html b/wqflask/wqflask/templates/show_trait_edit_data.html index a431821e..1402db47 100644 --- a/wqflask/wqflask/templates/show_trait_edit_data.html +++ b/wqflask/wqflask/templates/show_trait_edit_data.html @@ -54,6 +54,10 @@ +
+ {% if sample_groups[0].sample_qnorm is not none %} + + {% endif %}
@@ -111,7 +115,7 @@ {# Todo: Add IDs #} - previous_chr: previous_chr = marker['chr'] elif marker['chr'] < previous_chr: diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index 78bb3b42..8dbba530 100644 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -37,7 +37,7 @@ class SampleList(object): self.get_attributes() - self.sample_qnorm = get_transform_vals(self.dataset, this_trait) + #self.sample_qnorm = get_transform_vals(self.dataset, this_trait) if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': self.get_extra_attribute_values() diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index d6d83c02..7b952af4 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -12,6 +12,8 @@ from collections import OrderedDict import redis Redis = redis.StrictRedis() +import scipy.stats as ss + from flask import Flask, g from htmlgen import HTMLgen2 as HT @@ -137,6 +139,8 @@ class ShowTrait(object): self.make_sample_lists() + self.qnorm_vals = quantile_normalize_vals(self.sample_groups) + # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() @@ -281,6 +285,44 @@ class ShowTrait(object): self.sample_groups = (primary_samples,) self.dataset.group.allsamples = all_samples_ordered +def quantile_normalize_vals(sample_groups): + def normf(trait_vals): + ranked_vals = ss.rankdata(trait_vals) + p_list = [] + for i, val in enumerate(trait_vals): + p_list.append(((i+1) - 0.5)/len(trait_vals)) + + z = ss.norm.ppf(p_list) + normed_vals = [] + for rank in ranked_vals: + normed_vals.append("%0.3f" % z[int(rank)-1]) + + return normed_vals + + qnorm_by_group = [] + for sample_type in sample_groups: + trait_vals = [] + for sample in sample_type.sample_list: + try: + trait_vals.append(float(sample.value)) + except: + continue + + qnorm_vals = normf(trait_vals) + + qnorm_vals_with_x = [] + counter = 0 + for sample in sample_type.sample_list: + if sample.display_value == "x": + qnorm_vals_with_x.append("x") + else: + qnorm_vals_with_x.append(qnorm_vals[counter]) + counter += 1 + + qnorm_by_group.append(qnorm_vals_with_x) + + return qnorm_by_group + def get_nearest_marker(this_trait, this_db): this_chr = this_trait.locus_chr logger.debug("this_chr:", this_chr) diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index 17afc814..5e2ecc33 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -549,6 +549,7 @@ }; $('#block_outliers').click(block_outliers); reset_samples_table = function() { + $('input[name="transform"]').val(""); return $('.trait_value_input').each((function(_this) { return function(_index, element) { console.log("value is:", $(element).val()); @@ -559,6 +560,52 @@ })(this)); }; $('#reset').click(reset_samples_table); + + log_normalize_data = function() { + return $('.trait_value_input').each((function(_this) { + return function(_index, element) { + current_value = $(element).data("value"); + if(isNaN(current_value)) { + return current_value + } else { + $(element).val(Math.log2(current_value).toFixed(3)); + return Math.log2(current_value).toFixed(3) + } + }; + })(this)); + }; + + qnorm_data = function() { + return $('.trait_value_input').each((function(_this) { + return function(_index, element) { + current_value = $(element).data("value"); + if(isNaN(current_value)) { + return current_value + } else { + $(element).val($(element).data("qnorm")); + return $(element).data("qnorm"); + } + }; + })(this)); + }; + + normalize_data = function() { + if ($('#norm_method option:selected').val() == 'log2'){ + if ($('input[name="transform"]').val() != "log2") { + log_normalize_data() + $('input[name="transform"]').val("log2") + } + } + else if ($('#norm_method option:selected').val() == 'qnorm'){ + if ($('input[name="transform"]').val() != "qnorm") { + qnorm_data() + $('input[name="transform"]').val("qnorm") + } + } + } + + $('#normalize').click(normalize_data); + switch_qnorm_data = function() { return $('.trait_value_input').each((function(_this) { return function(_index, element) { @@ -734,7 +781,7 @@ box_data = [trace1, trace2, trace3] } else { var box_layout = { - width: 500, + width: 300, height: 500, margin: { l: 50, @@ -834,7 +881,7 @@ var layout = { yaxis: { - range: [range_bottom, range_top] + range: [range_bottom, range_top], }, width: 1200, height: 500, @@ -884,6 +931,7 @@ $('#block_outliers').click(edit_data_change); $('#reset').click(edit_data_change); $('#qnorm').click(edit_data_change); + $('#normalize').click(edit_data_change); return console.log("end"); }); diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html index 2a5556ea..dba3e266 100755 --- a/wqflask/wqflask/templates/index_page_orig.html +++ b/wqflask/wqflask/templates/index_page_orig.html @@ -34,11 +34,11 @@
-
-
- +
+
+
-
+
@@ -46,9 +46,9 @@
-
-
- +
+
+
@@ -56,21 +56,21 @@
-
-
- +
+
+
-
-
- +
+
+
-
+
@@ -85,8 +85,8 @@
-
-
+
+
@@ -95,8 +95,8 @@
-
-
+
+
Enter terms, genes, ID numbers in the Search field.
Use * or ? wildcards (Cyp*a?, synap*).
Use quotes for terms such as "tyrosine kinase". @@ -106,8 +106,8 @@
-
-
+
+
@@ -115,8 +115,8 @@
-
-
+
+
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 4aad4242..f67fff90 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -35,6 +35,7 @@ +
diff --git a/wqflask/wqflask/templates/show_trait_edit_data.html b/wqflask/wqflask/templates/show_trait_edit_data.html index 1402db47..482d1d88 100644 --- a/wqflask/wqflask/templates/show_trait_edit_data.html +++ b/wqflask/wqflask/templates/show_trait_edit_data.html @@ -9,7 +9,7 @@ needed.

-
+
{% for attribute in sample_groups[0].attributes %} @@ -42,7 +39,6 @@
{% endif %} -
@@ -55,9 +51,18 @@
+
+ + +
+

@@ -79,6 +84,7 @@ {% for sample_type in sample_groups %} + {% set outer_loop = loop %}

{{ sample_type.header }}


@@ -115,7 +121,7 @@ {# Todo: Add IDs #} -
- {% if g.user_session.logged_in and (g.user_session.num_collections > 0) %} + {% if g.user_session.logged_in %} + {% if g.user_session.num_collections < 1 %} No collections available. Please add traits to a collection to use them as covariates. + {% endif %} {% elif g.cookie_session.display_num_collections() == "" %} No collections available. Please add traits to a collection to use them as covariates. {% else %} diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index 5d388d66..830c7864 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -67,7 +67,10 @@ class AnonUser(object): # but wouldn't set cookies for staging and my branch. The new code (using @app.after_request) seems to work. @app.after_request def set_cookie(response): - response.set_cookie(self.cookie_name, self.cookie) + if self.cookie: + pass + else: + response.set_cookie(self.cookie_name, self.cookie) return response #@after.after_this_request -- cgit v1.2.3