From fdd28defcaf3326f3c6b6507124708d83a1da119 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Sun, 15 Apr 2018 11:57:09 +0300
Subject: Deactivate analysis of email_address field
* Prevent elasticsearch from analysing and tokenising the email_address
field so as to avoid issue with getting back all email addresses with
the same domain as the one being searched for.
---
wqflask/utility/elasticsearch_tools.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
(limited to 'wqflask/utility/elasticsearch_tools.py')
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
index d35cb5ee..7d2ee8c9 100644
--- a/wqflask/utility/elasticsearch_tools.py
+++ b/wqflask/utility/elasticsearch_tools.py
@@ -24,6 +24,8 @@ def get_elasticsearch_connection():
"host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
}]) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
+ setup_users_index(es)
+
es_logger = logging.getLogger("elasticsearch")
es_logger.setLevel(logging.INFO)
es_logger.addHandler(logging.NullHandler())
@@ -33,6 +35,17 @@ def get_elasticsearch_connection():
return es
+def setup_users_index(es_connection):
+ if es_connection:
+ index_settings = {
+ "properties": {
+ "email_address": {
+ "type": "string"
+ , "index": "not_analyzed"}}}
+
+ es_connection.indices.create(index='users', ignore=400)
+ es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local")
+
def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"):
return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type)
--
cgit v1.2.3
From dda4697505aea2cd950533dfb3a0dfb0e66ec018 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 16 Apr 2018 09:00:52 +0000
Subject: Docs on elasticsearch use
---
README.md | 24 ++++++++++++++------
bin/test-website | 2 +-
wqflask/utility/elasticsearch_tools.py | 41 ++++++++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 8 deletions(-)
(limited to 'wqflask/utility/elasticsearch_tools.py')
diff --git a/README.md b/README.md
index 3e7e64d0..59645994 100644
--- a/README.md
+++ b/README.md
@@ -17,25 +17,35 @@ deploy GN2 and dependencies as a self contained unit on any machine.
The database can be run separately as well as the source tree (for
developers). See the [installation docs](doc/README.org).
-## Test
+## Run
Once installed GN2 can be run online through a browser interface
```sh
-./bin/genenetwork2
+genenetwork2
```
-(default is http://localhost:5003/). For more examples, including running scripts and a Python REPL
-see the startup script [./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2).
+(default is http://localhost:5003/). For full examples (you'll need to
+set a number of environment variables), including running scripts and
+a Python REPL, see the startup script
+[./bin/genenetwork2](https://github.com/genenetwork/genenetwork2/blob/testing/bin/genenetwork2).
+## Testing
-We are building up automated
-testing using [mechanize](https://github.com/genenetwork/genenetwork2/tree/master/test/lib) which can be run with
+We are building 'Mechanical Rob' automated testing using Python
+[requests](https://github.com/genenetwork/genenetwork2/tree/master/test/lib)
+which can be run with something like
```sh
-./bin/test-website
+env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py -a http://localhost:5003
```
+The GN2_PROFILE is the Guix profile that contains all
+dependencies. The ./bin/genenetwork2 script sets up the environment
+and executes test-website.py in a Python interpreter. The -a switch
+says to run all tests and the URL points to the running GN2 http
+server.
+
## Documentation
User documentation can be found
diff --git a/bin/test-website b/bin/test-website
index 5935f016..7fbcfd2f 100755
--- a/bin/test-website
+++ b/bin/test-website
@@ -2,6 +2,6 @@
if [ -z $GN2_PROFILE ]; then
echo "Run request tests with something like"
- echo env GN2_PROFILE=/home/wrk/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py http://localhost:5003
+ echo env GN2_PROFILE=/home/wrk/opt/gn-latest ./bin/genenetwork2 ./etc/default_settings.py -c ../test/requests/test-website.py -a http://localhost:5003
exit 1
fi
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
index 7d2ee8c9..4d4a9844 100644
--- a/wqflask/utility/elasticsearch_tools.py
+++ b/wqflask/utility/elasticsearch_tools.py
@@ -1,3 +1,44 @@
+# Elasticsearch support
+#
+# Some helpful commands to view the database:
+#
+# You can test the server being up with
+#
+# curl -H 'Content-Type: application/json' http://localhost:9200
+#
+# List all indices
+#
+# curl -H 'Content-Type: application/json' 'localhost:9200/_cat/indices?v'
+#
+# To see the users index 'table'
+#
+# curl http://localhost:9200/users
+#
+# To list all user ids
+#
+# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
+# {
+# "query" : {
+# "match_all" : {}
+# },
+# "stored_fields": []
+# }'
+#
+# To view a record
+#
+# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
+# {
+# "query" : {
+# "match" : { "email_address": "pjotr2017@thebird.nl"}
+# }
+# }'
+#
+#
+# To delete the users index and data (dangerous!)
+#
+# curl -XDELETE -H 'Content-Type: application/json' 'localhost:9200/users'
+
+
from elasticsearch import Elasticsearch, TransportError
import logging
--
cgit v1.2.3
From bc1672f8617c56684ae3aeda7018362e818c46d6 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 16 Apr 2018 17:25:14 +0300
Subject: Update mappings for Elasticsearch 6.2. Update logger
* Update the indexes mappings to be compatible with the newer
Elasticsearch 6.2.* series.
Close the index before updating it, and reopen it after to help with
the re-indexing of the data.
* Update the error logger to include the exception that was thrown.
---
wqflask/utility/elasticsearch_tools.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
(limited to 'wqflask/utility/elasticsearch_tools.py')
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
index 7d2ee8c9..0dc59d43 100644
--- a/wqflask/utility/elasticsearch_tools.py
+++ b/wqflask/utility/elasticsearch_tools.py
@@ -29,8 +29,8 @@ def get_elasticsearch_connection():
es_logger = logging.getLogger("elasticsearch")
es_logger.setLevel(logging.INFO)
es_logger.addHandler(logging.NullHandler())
- except:
- logger.error("Failed to get elasticsearch connection")
+ except Exception as e:
+ logger.error("Failed to get elasticsearch connection", e)
es = None
return es
@@ -40,11 +40,12 @@ def setup_users_index(es_connection):
index_settings = {
"properties": {
"email_address": {
- "type": "string"
- , "index": "not_analyzed"}}}
+ "type": "keyword"}}}
es_connection.indices.create(index='users', ignore=400)
+ es_connection.indices.close(index="users")
es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local")
+ es_connection.indices.open(index="users")
def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"):
return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type)
--
cgit v1.2.3
From fcc43dd4008692b27935d90fcfd134d6c5d9495e Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Mon, 16 Apr 2018 18:46:29 +0300
Subject: Remove statements that might be causing issues
* I can't swear on this, but it seems the presence of these statements
was causing elasticsearch to act weird.
---
wqflask/utility/elasticsearch_tools.py | 2 --
1 file changed, 2 deletions(-)
(limited to 'wqflask/utility/elasticsearch_tools.py')
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
index 76dcaebf..cce210c3 100644
--- a/wqflask/utility/elasticsearch_tools.py
+++ b/wqflask/utility/elasticsearch_tools.py
@@ -84,9 +84,7 @@ def setup_users_index(es_connection):
"type": "keyword"}}}
es_connection.indices.create(index='users', ignore=400)
- es_connection.indices.close(index="users")
es_connection.indices.put_mapping(body=index_settings, index="users", doc_type="local")
- es_connection.indices.open(index="users")
def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"):
return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type)
--
cgit v1.2.3
From 67e8f12e103f48329d8b3e38125c0e84b9dc089d Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 17 May 2018 16:32:44 +0000
Subject: Added script to quantile normalize a data set and enter its
normalized sample data into ElasticSearch
Added option to replace trait page sample/strain values with normalized ones
Began editing Lei's scatterplot code
Changed elasticsearch_tools' get_elasticsearch_connection so that it can also be used for purposes other than user authentication (by adding a "for_user" parameter)
---
wqflask/base/anon_collection.py | 22 -
wqflask/base/trait_collection.py | 53 ---
wqflask/maintenance/quantile_normalize.py | 129 ++++++
wqflask/utility/elasticsearch_tools.py | 5 +-
wqflask/wqflask/correlation/corr_scatter_plot.py | 53 +--
wqflask/wqflask/show_trait/SampleList.py | 77 ++--
wqflask/wqflask/show_trait/show_trait.py | 4 -
.../wqflask/static/new/css/corr_scatter_plot.css | 40 +-
.../static/new/javascript/draw_corr_scatterplot.js | 71 ++-
.../wqflask/static/new/javascript/show_trait.js | 13 +
.../new/javascript/show_trait_mapping_tools.js | 2 +-
wqflask/wqflask/templates/corr_scatterplot.html | 476 +++++++++++----------
.../wqflask/templates/show_trait_edit_data.html | 6 +-
13 files changed, 567 insertions(+), 384 deletions(-)
delete mode 100644 wqflask/base/anon_collection.py
delete mode 100644 wqflask/base/trait_collection.py
create mode 100644 wqflask/maintenance/quantile_normalize.py
(limited to 'wqflask/utility/elasticsearch_tools.py')
diff --git a/wqflask/base/anon_collection.py b/wqflask/base/anon_collection.py
deleted file mode 100644
index dd1aa27f..00000000
--- a/wqflask/base/anon_collection.py
+++ /dev/null
@@ -1,22 +0,0 @@
-class AnonCollection(TraitCollection):
-
- def __init__(self, anon_id):
- self.anon_id = anon_id
- self.collection_members = Redis.smembers(self.anon_id)
- print("self.collection_members is:", self.collection_members)
- self.num_members = len(self.collection_members)
-
-
- @app.route("/collections/remove", methods=('POST',))
- def remove_traits(traits_to_remove):
- print("traits_to_remove:", traits_to_remove)
- for trait in traits_to_remove:
- Redis.srem(self.anon_id, trait)
-
- members_now = self.collection_members - traits_to_remove
- print("members_now:", members_now)
- print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
-
- # We need to return something so we'll return this...maybe in the future
- # we can use it to check the results
- return str(len(members_now))
diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py
deleted file mode 100644
index d388a3af..00000000
--- a/wqflask/base/trait_collection.py
+++ /dev/null
@@ -1,53 +0,0 @@
-class TraitCollection(object):
-
- def __init__(self, is_anon=False):
- self.is_anon = is_anon
-
-
- @app.route("/collections/remove", methods=('POST',))
- def remove_traits():
- if is_anon:
- AnonCollection.remove_traits()
- else:
- UserCollection.remove_traits()
-
- params = request.form
- print("params are:", params)
- uc_id = params['uc_id']
- uc = model.UserCollection.query.get(uc_id)
- traits_to_remove = params.getlist('traits[]')
- print("traits_to_remove are:", traits_to_remove)
- traits_to_remove = process_traits(traits_to_remove)
- print("\n\n after processing, traits_to_remove:", traits_to_remove)
- all_traits = uc.members_as_set()
- print(" all_traits:", all_traits)
- members_now = all_traits - traits_to_remove
- print(" members_now:", members_now)
- print("Went from {} to {} members in set.".format(len(all_traits), len(members_now)))
- uc.members = json.dumps(list(members_now))
- uc.changed_timestamp = datetime.datetime.utcnow()
- db_session.commit()
-
- # We need to return something so we'll return this...maybe in the future
- # we can use it to check the results
- return str(len(members_now))
-
- def __init__(self, anon_id)
- self.anon_key = anon_key
- self.collection_members = Redis.smembers(self.anon_id)
- print("self.collection_members is:", self.collection_members)
- self.num_members = len(self.collection_members)
-
-
- @app.route("/collections/remove", methods=('POST',))
- def remove_traits(traits_to_remove):
- print("traits_to_remove:", traits_to_remove)
- for trait in traits_to_remove:
- Redis.srem(self.anon_id, trait)
- members_now = self.collection_members - traits_to_remove
- print("members_now:", members_now)
- print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
-
- # We need to return something so we'll return this...maybe in the future
- # we can use it to check the results
- return str(len(members_now))
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
new file mode 100644
index 00000000..c11073fb
--- /dev/null
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -0,0 +1,129 @@
+from __future__ import absolute_import, print_function, division
+
+import sys
+sys.path.insert(0,'./')
+
+from itertools import izip
+
+import MySQLdb
+import urlparse
+
+import numpy as np
+import pandas as pd
+from elasticsearch import Elasticsearch, TransportError
+from elasticsearch.helpers import bulk
+
+from flask import Flask, g, request
+
+from wqflask import app
+from utility.elasticsearch_tools import get_elasticsearch_connection
+from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI
+
+def parse_db_uri():
+ """Converts a database URI to the db name, host name, user name, and password"""
+
+ parsed_uri = urlparse.urlparse(SQL_URI)
+
+ db_conn_info = dict(
+ db = parsed_uri.path[1:],
+ host = parsed_uri.hostname,
+ user = parsed_uri.username,
+ passwd = parsed_uri.password)
+
+ print(db_conn_info)
+ return db_conn_info
+
+def create_dataframe(input_file):
+ with open(input_file) as f:
+ ncols = len(f.readline().split("\t"))
+
+ input_array = np.loadtxt(open(input_file, "rb"), delimiter="\t", skiprows=1, usecols=range(1, ncols))
+ return pd.DataFrame(input_array)
+
+#This function taken from https://github.com/ShawnLYU/Quantile_Normalize
+def quantileNormalize(df_input):
+ df = df_input.copy()
+ #compute rank
+ dic = {}
+ for col in df:
+ dic.update({col : sorted(df[col])})
+ sorted_df = pd.DataFrame(dic)
+ rank = sorted_df.mean(axis = 1).tolist()
+ #sort
+ for col in df:
+ t = np.searchsorted(np.sort(df[col]), df[col])
+ df[col] = [rank[i] for i in t]
+ return df
+
+def set_data(dataset_name):
+ orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
+
+ sample_list = []
+ with open(orig_file, 'r') as orig_fh, open('quant_norm.csv', 'r') as quant_fh:
+ for i, (line1, line2) in enumerate(izip(orig_fh, quant_fh)):
+ trait_dict = {}
+ sample_list = []
+ if i == 0:
+ sample_names = line1.split('\t')[1:]
+ else:
+ trait_name = line1.split('\t')[0]
+ for i, sample in enumerate(sample_names):
+ this_sample = {
+ "name": sample,
+ "value": line1.split('\t')[i+1],
+ "qnorm": line2.split('\t')[i+1]
+ }
+ sample_list.append(this_sample)
+ query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
+ FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet
+ WHERE Species.Id = InbredSet.SpeciesId and
+ InbredSet.Id = ProbeFreeze.InbredSetId and
+ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and
+ ProbeSetFreeze.Name = '%s' and
+ ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id and
+ ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0])
+ Cursor.execute(query)
+ result_info = Cursor.fetchone()
+
+ yield {
+ "_index": "traits",
+ "_type": "trait",
+ "_source": {
+ "name": trait_name,
+ "species": result_info[0],
+ "group": result_info[1],
+ "dataset": dataset_name,
+ "dataset_fullname": result_info[2],
+ "samples": sample_list,
+ "transform_types": "qnorm"
+ }
+ }
+
+if __name__ == '__main__':
+ Conn = MySQLdb.Connect(**parse_db_uri())
+ Cursor = Conn.cursor()
+
+ #es = Elasticsearch([{
+ # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
+ #}], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
+
+ es = get_elasticsearch_connection(for_user=False)
+
+ #input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
+ #input_df = create_dataframe(input_filename)
+ #output_df = quantileNormalize(input_df)
+
+ #output_df.to_csv('quant_norm.csv', sep='\t')
+
+ #out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
+
+ #success, _ = bulk(es, set_data(sys.argv[1]))
+
+ response = es.search(
+ index = "traits", doc_type = "trait", body = {
+ "query": { "match": { "name": "ENSMUSG00000028982" } }
+ }
+ )
+
+ print(response)
\ No newline at end of file
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
index cce210c3..293a9ae6 100644
--- a/wqflask/utility/elasticsearch_tools.py
+++ b/wqflask/utility/elasticsearch_tools.py
@@ -52,7 +52,7 @@ def test_elasticsearch_connection():
if not es.ping():
logger.warning("Elasticsearch is DOWN")
-def get_elasticsearch_connection():
+def get_elasticsearch_connection(for_user=True):
"""Return a connection to ES. Returns None on failure"""
logger.info("get_elasticsearch_connection")
es = None
@@ -65,7 +65,8 @@ def get_elasticsearch_connection():
"host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
}]) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
- setup_users_index(es)
+ if for_user:
+ setup_users_index(es)
es_logger = logging.getLogger("elasticsearch")
es_logger.setLevel(logging.INFO)
diff --git a/wqflask/wqflask/correlation/corr_scatter_plot.py b/wqflask/wqflask/correlation/corr_scatter_plot.py
index 94711c67..831baf7e 100644
--- a/wqflask/wqflask/correlation/corr_scatter_plot.py
+++ b/wqflask/wqflask/correlation/corr_scatter_plot.py
@@ -6,44 +6,19 @@ from utility import corr_result_helpers
from scipy import stats
import numpy as np
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
+
class CorrScatterPlot(object):
"""Page that displays a correlation scatterplot with a line fitted to it"""
def __init__(self, params):
self.data_set_1 = data_set.create_dataset(params['dataset_1'])
self.data_set_2 = data_set.create_dataset(params['dataset_2'])
+ #self.data_set_3 = data_set.create_dataset(params['dataset_3'])
self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)
-
- try:
- width = int(params['width'])
- except:
- width = 800
-
- try:
- height = int(params['height'])
- except:
- height = 600
-
- try:
- circle_color = params['circle_color']
- except:
- circle_color = '#3D85C6'
-
- try:
- circle_radius = int(params['circle_radius'])
- except:
- circle_radius = 5
-
- try:
- line_color = params['line_color']
- except:
- line_color = '#FF0000'
-
- try:
- line_width = int(params['line_width'])
- except:
- line_width = 1
+ #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3)
samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)
@@ -60,14 +35,18 @@ class CorrScatterPlot(object):
x = np.array(vals_1)
y = np.array(vals_2)
- slope, intercept, r_value, p_value, _std_err = stats.linregress(x, y)
+ slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
rx = stats.rankdata(x)
ry = stats.rankdata(y)
self.rdata = []
self.rdata.append(rx.tolist())
self.rdata.append(ry.tolist())
- srslope, srintercept, srr_value, srp_value, _srstd_err = stats.linregress(rx, ry)
+ srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)
+
+ #vals_3 = []
+ #for sample in self.trait_3.data:
+ # vals_3.append(self.trait_3.data[sample].value)
self.js_data = dict(
data = self.data,
@@ -89,13 +68,9 @@ class CorrScatterPlot(object):
srslope = srslope,
srintercept = srintercept,
srr_value = srr_value,
- srp_value = srp_value,
+ srp_value = srp_value
- width = width,
- height = height,
- circle_color = circle_color,
- circle_radius = circle_radius,
- line_color = line_color,
- line_width = line_width
+ #trait3 = self.trait_3.data,
+ #vals_3 = vals_3
)
self.jsdata = self.js_data
diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py
index 6d84a960..78bb3b42 100644
--- a/wqflask/wqflask/show_trait/SampleList.py
+++ b/wqflask/wqflask/show_trait/SampleList.py
@@ -10,8 +10,12 @@ import numpy as np
from scipy import stats
from pprint import pformat as pf
+import simplejson as json
+
import itertools
+from utility.elasticsearch_tools import get_elasticsearch_connection
+
import utility.logger
logger = utility.logger.getLogger(__name__ )
@@ -33,6 +37,8 @@ class SampleList(object):
self.get_attributes()
+ self.sample_qnorm = get_transform_vals(self.dataset, this_trait)
+
if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet':
self.get_extra_attribute_values()
@@ -152,36 +158,47 @@ class SampleList(object):
return any(sample.variance for sample in self.sample_list)
-#def z_score(vals):
-# vals_array = np.array(vals)
-# mean = np.mean(vals_array)
-# stdv = np.std(vals_array)
-#
-# z_scores = []
-# for val in vals_array:
-# z_score = (val - mean)/stdv
-# z_scores.append(z_score)
-#
-#
-#
-# return z_scores
-
-
-#def z_score(row):
-# L = [n for n in row if not np.isnan(n)]
-# m = np.mean(L)
-# s = np.std(L)
-# zL = [1.0 * (n - m) / s for n in L]
-# if len(L) == len(row): return zL
-# # deal with nan
-# retL = list()
-# for n in row:
-# if np.isnan(n):
-# retL.append(nan)
-# else:
-# retL.append(zL.pop(0))
-# assert len(zL) == 0
-# return retL
+def get_transform_vals(dataset, trait):
+ es = get_elasticsearch_connection(for_user=False)
+
+ logger.info("DATASET NAME:", dataset.name)
+
+ query = '{"bool": {"must": [{"match": {"name": "%s"}}, {"match": {"dataset": "%s"}}]}}' % (trait.name, dataset.name)
+
+ es_body = {
+ "query": {
+ "bool": {
+ "must": [
+ {
+ "match": {
+ "name": "%s" % (trait.name)
+ }
+ },
+ {
+ "match": {
+ "dataset": "%s" % (dataset.name)
+ }
+ }
+ ]
+ }
+ }
+ }
+
+ response = es.search( index = "traits", doc_type = "trait", body = es_body )
+ logger.info("THE RESPONSE:", response)
+ results = response['hits']['hits']
+
+ if len(results) > 0:
+ samples = results[0]['_source']['samples']
+
+ sample_dict = {}
+ for sample in samples:
+ sample_dict[sample['name']] = sample['qnorm']
+
+ logger.info("SAMPLE DICT:", sample_dict)
+ return sample_dict
+ else:
+ return None
def natural_sort_key(x):
"""Get expected results when using as a key for sort - ints or strings are sorted properly"""
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 8b801396..d6d83c02 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -26,9 +26,6 @@ from db import webqtlDatabaseFunction
from pprint import pformat as pf
-from utility.tools import flat_files, flat_file_exists
-from utility.tools import get_setting
-
from utility.logger import getLogger
logger = getLogger(__name__ )
@@ -306,7 +303,6 @@ def get_nearest_marker(this_trait, this_db):
#return "", ""
else:
return result[0][0]
- #return result[0][0], result[1][0]
def get_genofiles(this_dataset):
jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, this_dataset.group.name)
diff --git a/wqflask/wqflask/static/new/css/corr_scatter_plot.css b/wqflask/wqflask/static/new/css/corr_scatter_plot.css
index c62d4c9a..a2ebb252 100644
--- a/wqflask/wqflask/static/new/css/corr_scatter_plot.css
+++ b/wqflask/wqflask/static/new/css/corr_scatter_plot.css
@@ -1,13 +1,41 @@
-.chart {
+.nvd3 .nv-axis.nv-x text {
+ font-size: 16px;
+ font-weight: normal;
+ fill: black;
+}
+.nvd3 .nv-axis.nv-y text {
+ font-size: 16px;
+ font-weight: normal;
+ fill: black;
}
-.main text {
- font: 10px sans-serif;
+.nv-x .nv-axis g path.domain {
+ stroke: black;
+ stroke-width: 2;
}
-.axis line, .axis path {
- shape-rendering: crispEdges;
+.nv-y .nv-axis g path.domain {
stroke: black;
- fill: none;
+ stroke-width: 2;
+}
+
+.nvd3 .nv-axis.nv-x path.domain {
+ stroke-opacity: 1;
+}
+
+.nvd3 .nv-axis.nv-y path.domain {
+ stroke-opacity: 1;
+}
+
+line.nv-regLine {
+ stroke: red;
+ stroke-width: 1;
+}
+
+.nv-axisMin-x,
+.nv-axisMax-x,
+.nv-axisMin-y,
+.nv-axisMax-y {
+ display: none;
}
diff --git a/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js b/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js
index cfde6f09..c290cdfe 100644
--- a/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js
+++ b/wqflask/wqflask/static/new/javascript/draw_corr_scatterplot.js
@@ -1,3 +1,5 @@
+// http://gn2-lei.genenetwork.org/corr_scatter_plot2?dataset_1=HC_M2_0606_P&dataset_2=HC_M2_0606_P&dataset_3=HC_M2_0606_P&trait_1=1427571_at&trait_2=1457022_at&trait_3=1427571_at
+
var chart;
var srchart;
@@ -7,7 +9,7 @@ function drawg() {
//
chart.showLegend(false);
chart.duration(300);
- chart.color(d3.scale.category10().range());
+ //chart.color(d3.scale.category10().range());
chart.pointRange([0, 400]);
chart.pointDomain([0, 10]);
//
@@ -74,18 +76,47 @@ function getdata(size, shape) {
slope: js_data.slope,
intercept: js_data.intercept
});
+
+ sizemin = 1;
+ sizemax = 50;
+ if ('vals_3' in js_data) {
+ datamin = d3.min(js_data.vals_3);
+ datamax = d3.max(js_data.vals_3);
+ colormin = $("#cocolorfrom").val();
+ colormax = $("#cocolorto").val();
+ compute = d3.interpolate(colormin, colormax);
+ linear = d3.scale.linear().domain([datamin, datamax]).range([0,1]);
+ }
+
for (j = 0; j < js_data.data[0].length; j++) {
+ if ('trait3' in js_data) {
+ if (js_data.indIDs[j] in js_data.trait3) {
+ datav = js_data.trait3[js_data.indIDs[j]].value;
+ // size = (sizemax - sizemin) * (datav - datamin) / (datamax - datamin) + sizemin;
+ sizev = map1to2(datamin, datamax, sizemin, sizemax, datav);
+ }
+ } else {
+ datav = 0;
+ sizev = sizemin;
+ }
data[0].values.push({
x: js_data.data[0][j],
y: js_data.data[1][j],
name: js_data.indIDs[j],
- size: size,
- shape: shape
+ size: sizev,
+ shape: shape,
+ v3: datav
});
}
+ console.log(data);
return data;
}
+function map1to2 (min1, max1, min2, max2, v1) {
+ v2 = (v1 - min1) * (max2 - min2) / (max1 - min1) + min2;
+ return v2;
+}
+
function srgetdata(size, shape) {
var data = [];
data.push({
@@ -94,6 +125,12 @@ function srgetdata(size, shape) {
intercept: js_data.srintercept
});
for (j = 0; j < js_data.rdata[0].length; j++) {
+ if (js_data.indIDs[j] in js_data.trait3) {
+ size = js_data.trait3[js_data.indIDs[j]].value;
+ //console.log("yes "+js_data.indIDs[j]+", "+size);
+ } else {
+ //console.log("no "+js_data.indIDs[j]);
+ }
data[0].values.push({
x: js_data.rdata[0][j],
y: js_data.rdata[1][j],
@@ -163,13 +200,39 @@ function chartupdatewh() {
window.dispatchEvent(new Event('resize'));
}
+ function colorer(d) {
+ datamin = d3.min(js_data.vals_3);
+ datamax = d3.max(js_data.vals_3);
+ //colormin = d3.rgb(255,0,0);
+ //colormax = d3.rgb(0,255,0);
+ colormin = $("#cocolorfrom").val();
+ colormax = $("#cocolorto").val();
+
+ console.log("colormin: "+colormin);
+ console.log("colormax: "+colormax);
+
+ compute = d3.interpolate(colormin, colormax);
+ linear = d3.scale.linear().domain([datamin, datamax]).range([0,1]);
+ //console.log(d[0].x);
+ c= compute(linear(d[0].x));
+ //console.log(c);
+ return c;
+ }
+
function chartupdatedata() {
//
var size = $("#marksize").val();
var shape = $("#markshape").val();
//
- d3.select('#scatterplot2 svg').datum(nv.log(getdata(size, shape))).call(chart);
+ d3.select('#scatterplot2 svg').datum(getdata(size, shape)).call(chart);
d3.select('#srscatterplot2 svg').datum(nv.log(srgetdata(size, shape))).call(srchart);
+ //
+ d3.selectAll('.nv-point')
+ .attr({
+ 'stroke': colorer,
+ 'fill': colorer
+ });
+ //
nv.utils.windowResize(chart.update);
nv.utils.windowResize(srchart.update);
}
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js
index dfdafaf0..17afc814 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait.js
@@ -559,6 +559,18 @@
})(this));
};
$('#reset').click(reset_samples_table);
+ switch_qnorm_data = function() {
+ return $('.trait_value_input').each((function(_this) {
+ return function(_index, element) {
+ transform_val = $(element).data('transform')
+ if (transform_val != "") {
+ $(element).val(transform_val.toFixed(3));
+ }
+ return transform_val
+ };
+ })(this));
+ };
+ $('#qnorm').click(switch_qnorm_data);
get_sample_table_data = function(table_name) {
var samples;
samples = [];
@@ -871,6 +883,7 @@
$('#exclude_group').click(edit_data_change);
$('#block_outliers').click(edit_data_change);
$('#reset').click(edit_data_change);
+ $('#qnorm').click(edit_data_change);
return console.log("end");
});
diff --git a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
index 81123de7..daa5b3f2 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait_mapping_tools.js
@@ -213,7 +213,7 @@
var form_data, url;
console.log("RUNNING GEMMA");
url = "/loading";
- $('input[name=method]').val("gemma_bimbam");
+ $('input[name=method]').val("gemma");
$('input[name=num_perm]').val(0);
$('input[name=genofile]').val($('#genofile_gemma').val());
$('input[name=maf]').val($('input[name=maf_gemma]').val());
diff --git a/wqflask/wqflask/templates/corr_scatterplot.html b/wqflask/wqflask/templates/corr_scatterplot.html
index 87588534..e0f017c2 100644
--- a/wqflask/wqflask/templates/corr_scatterplot.html
+++ b/wqflask/wqflask/templates/corr_scatterplot.html
@@ -1,222 +1,254 @@
-{% extends "base.html" %}
-
-{% block css %}
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block content %}
-
-
-
-
Correlation Scatterplot
-
-
-
- {% if trait_1.dataset.type == "ProbeSet" %}
-
-
- [{{trait_1.symbol}} on {{trait_1.location_repr}} Mb]
- {{trait_1.description_display}}
-
- {% elif trait_1.dataset.type == "Publish" %}
-
-
- {% endif %}
-
-
-
- {% if trait_2.dataset.type == "ProbeSet" %}
-
-
- [{{trait_2.symbol}} on {{trait_2.location_repr}} Mb]
- {{trait_2.description_display}}
-
- {% elif trait_2.dataset.type == "Publish" %}
-
-
- {% endif %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Pearson Correlation Scatterplot
-
-
-
-
-
-
-
- Statistic Value
-
-
- Number {{jsdata.num_overlap}}
- Slope {{'%0.3f' % jsdata.slope}}
- Intercept {{'%0.3f' % jsdata.intercept}}
- r value {{'%0.3f' % jsdata.r_value}}
- P value {% if jsdata.p_value < 0.001 %}{{'%0.3e' % jsdata.p_value}}{% else %}{{'%0.3f' % jsdata.p_value}}{% endif %}
-
-
- Regression Line
-
- y = {{'%0.3f' % jsdata.slope}} * x + {{'%0.3f' % jsdata.intercept}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Spearman Rank Correlation Scatterplot
-
-
-
-
-
-
-
- Statistic Value
-
-
- Number {{jsdata.num_overlap}}
- Slope {{'%0.3f' % jsdata.srslope}}
- Intercept {{'%0.3f' % jsdata.srintercept}}
- r value {{'%0.3f' % jsdata.srr_value}}
- P value {% if jsdata.srp_value < 0.001 %}{{'%0.3e' % jsdata.srp_value}}{% else %}{{'%0.3f' % jsdata.srp_value}}{% endif %}
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block js %}
-
-
-
-
-
-
-
-
-{% endblock %}
+{% extends "base.html" %}
+
+{% block css %}
+
+
+
+
+
+{% endblock %}
+
+{% block content %}
+
+
+
+
Correlation Scatterplot
+
+
+ {% if trait_1.dataset.type == "ProbeSet" %}
+
+
+ [{{trait_1.symbol}} on {{trait_1.location_repr}} Mb]
+ {{trait_1.description_display}}
+
+ {% elif trait_1.dataset.type == "Publish" %}
+
+
+ {% endif %}
+
+
+
+ {% if trait_2.dataset.type == "ProbeSet" %}
+
+
+ [{{trait_2.symbol}} on {{trait_2.location_repr}} Mb]
+ {{trait_2.description_display}}
+
+ {% elif trait_2.dataset.type == "Publish" %}
+
+
+ {% endif %}
+
+
+
+ {% if trait_3 %}
+ {% if trait_3.dataset.type == "ProbeSet" %}
+
+
+ [{{trait_3.symbol}} on {{trait_3.location_repr}} Mb]
+ {{trait_3.description_display}}
+
+ {% elif trait_3.dataset.type == "Publish" %}
+
+
+ {% endif %}
+ {% endif %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Pearson Correlation Scatterplot
+
+
+
+
+
+
+
+ Statistic Value
+
+
+
+ Number
+ {{jsdata.num_overlap}}
+
+
+ Slope
+ {{'%0.3f' % jsdata.slope}}
+
+
+ Intercept
+ {{'%0.3f' % jsdata.intercept}}
+
+
+ r value
+ {{'%0.3f' % jsdata.r_value}}
+
+
+ P value
+ {% if jsdata.p_value < 0.001 %}{{'%0.3e' % jsdata.p_value}}{% else %}{{'%0.3f' % jsdata.p_value}}{% endif %}
+
+
+
+ Regression Line
+
+ y = {{'%0.3f' % jsdata.slope}} * x + {{'%0.3f' % jsdata.intercept}}
+
+
+
+
+
+
+
+
+
+
+
+
+
Spearman Rank Correlation Scatterplot
+
+
+
+
+
+
+
+
+
+ Statistic Value
+
+
+
+ Number
+ {{jsdata.num_overlap}}
+
+
+ Slope
+ {{'%0.3f' % jsdata.srslope}}
+
+
+ Intercept
+ {{'%0.3f' % jsdata.srintercept}}
+
+
+ r value
+ {{'%0.3f' % jsdata.srr_value}}
+
+
+ P value
+ {% if jsdata.srp_value < 0.001 %}{{'%0.3e' % jsdata.srp_value}}{% else %}{{'%0.3f' % jsdata.srp_value}}{% endif %}
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block js %}
+
+
+
+
+
+
+
+
+{% endblock %}
diff --git a/wqflask/wqflask/templates/show_trait_edit_data.html b/wqflask/wqflask/templates/show_trait_edit_data.html
index a431821e..1402db47 100644
--- a/wqflask/wqflask/templates/show_trait_edit_data.html
+++ b/wqflask/wqflask/templates/show_trait_edit_data.html
@@ -54,6 +54,10 @@
CSV
+
+ {% if sample_groups[0].sample_qnorm is not none %}
+
+ {% endif %}
@@ -111,7 +115,7 @@
{# Todo: Add IDs #}
- previous_chr:
previous_chr = marker['chr']
elif marker['chr'] < previous_chr:
diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py
index 78bb3b42..8dbba530 100644
--- a/wqflask/wqflask/show_trait/SampleList.py
+++ b/wqflask/wqflask/show_trait/SampleList.py
@@ -37,7 +37,7 @@ class SampleList(object):
self.get_attributes()
- self.sample_qnorm = get_transform_vals(self.dataset, this_trait)
+ #self.sample_qnorm = get_transform_vals(self.dataset, this_trait)
if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet':
self.get_extra_attribute_values()
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index d6d83c02..7b952af4 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -12,6 +12,8 @@ from collections import OrderedDict
import redis
Redis = redis.StrictRedis()
+import scipy.stats as ss
+
from flask import Flask, g
from htmlgen import HTMLgen2 as HT
@@ -137,6 +139,8 @@ class ShowTrait(object):
self.make_sample_lists()
+ self.qnorm_vals = quantile_normalize_vals(self.sample_groups)
+
# Todo: Add back in the ones we actually need from below, as we discover we need them
hddn = OrderedDict()
@@ -281,6 +285,44 @@ class ShowTrait(object):
self.sample_groups = (primary_samples,)
self.dataset.group.allsamples = all_samples_ordered
+def quantile_normalize_vals(sample_groups):
+ def normf(trait_vals):
+ ranked_vals = ss.rankdata(trait_vals)
+ p_list = []
+ for i, val in enumerate(trait_vals):
+ p_list.append(((i+1) - 0.5)/len(trait_vals))
+
+ z = ss.norm.ppf(p_list)
+ normed_vals = []
+ for rank in ranked_vals:
+ normed_vals.append("%0.3f" % z[int(rank)-1])
+
+ return normed_vals
+
+ qnorm_by_group = []
+ for sample_type in sample_groups:
+ trait_vals = []
+ for sample in sample_type.sample_list:
+ try:
+ trait_vals.append(float(sample.value))
+ except:
+ continue
+
+ qnorm_vals = normf(trait_vals)
+
+ qnorm_vals_with_x = []
+ counter = 0
+ for sample in sample_type.sample_list:
+ if sample.display_value == "x":
+ qnorm_vals_with_x.append("x")
+ else:
+ qnorm_vals_with_x.append(qnorm_vals[counter])
+ counter += 1
+
+ qnorm_by_group.append(qnorm_vals_with_x)
+
+ return qnorm_by_group
+
def get_nearest_marker(this_trait, this_db):
this_chr = this_trait.locus_chr
logger.debug("this_chr:", this_chr)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js
index 17afc814..5e2ecc33 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait.js
@@ -549,6 +549,7 @@
};
$('#block_outliers').click(block_outliers);
reset_samples_table = function() {
+ $('input[name="transform"]').val("");
return $('.trait_value_input').each((function(_this) {
return function(_index, element) {
console.log("value is:", $(element).val());
@@ -559,6 +560,52 @@
})(this));
};
$('#reset').click(reset_samples_table);
+
+ log_normalize_data = function() {
+ return $('.trait_value_input').each((function(_this) {
+ return function(_index, element) {
+ current_value = $(element).data("value");
+ if(isNaN(current_value)) {
+ return current_value
+ } else {
+ $(element).val(Math.log2(current_value).toFixed(3));
+ return Math.log2(current_value).toFixed(3)
+ }
+ };
+ })(this));
+ };
+
+ qnorm_data = function() {
+ return $('.trait_value_input').each((function(_this) {
+ return function(_index, element) {
+ current_value = $(element).data("value");
+ if(isNaN(current_value)) {
+ return current_value
+ } else {
+ $(element).val($(element).data("qnorm"));
+ return $(element).data("qnorm");
+ }
+ };
+ })(this));
+ };
+
+ normalize_data = function() {
+ if ($('#norm_method option:selected').val() == 'log2'){
+ if ($('input[name="transform"]').val() != "log2") {
+ log_normalize_data()
+ $('input[name="transform"]').val("log2")
+ }
+ }
+ else if ($('#norm_method option:selected').val() == 'qnorm'){
+ if ($('input[name="transform"]').val() != "qnorm") {
+ qnorm_data()
+ $('input[name="transform"]').val("qnorm")
+ }
+ }
+ }
+
+ $('#normalize').click(normalize_data);
+
switch_qnorm_data = function() {
return $('.trait_value_input').each((function(_this) {
return function(_index, element) {
@@ -734,7 +781,7 @@
box_data = [trace1, trace2, trace3]
} else {
var box_layout = {
- width: 500,
+ width: 300,
height: 500,
margin: {
l: 50,
@@ -834,7 +881,7 @@
var layout = {
yaxis: {
- range: [range_bottom, range_top]
+ range: [range_bottom, range_top],
},
width: 1200,
height: 500,
@@ -884,6 +931,7 @@
$('#block_outliers').click(edit_data_change);
$('#reset').click(edit_data_change);
$('#qnorm').click(edit_data_change);
+ $('#normalize').click(edit_data_change);
return console.log("end");
});
diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html
index 2a5556ea..dba3e266 100755
--- a/wqflask/wqflask/templates/index_page_orig.html
+++ b/wqflask/wqflask/templates/index_page_orig.html
@@ -34,11 +34,11 @@