diff options
-rwxr-xr-x | bin/genenetwork2 | 7 | ||||
-rw-r--r-- | doc/elasticsearch.org | 247 | ||||
-rw-r--r-- | test/requests/parametrized_test.py | 32 | ||||
-rwxr-xr-x | test/requests/test-website.py | 1 | ||||
-rw-r--r-- | test/requests/test_forgot_password.py | 29 | ||||
-rw-r--r-- | test/requests/test_registration.py | 36 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 18 | ||||
-rw-r--r-- | wqflask/utility/elasticsearch_tools.py | 121 | ||||
-rw-r--r-- | wqflask/utility/tools.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/user_session.py | 1 |
10 files changed, 23 insertions, 474 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2 index 2b94b2a2..5f714d2e 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -101,13 +101,6 @@ fi export GN2_SETTINGS=$settings # Python echo GN2_SETTINGS=$settings -# This is a temporary hack to inject ES - should have added python2-elasticsearch package to guix instead -# if [ -z $ELASTICSEARCH_PROFILE ]; then -# echo -e "WARNING: Elastic Search profile has not been set - use ELASTICSEARCH_PROFILE"; -# else -# PYTHONPATH="$PYTHONPATH${PYTHONPATH:+:}$ELASTICSEARCH_PROFILE/lib/python3.8/site-packages" -# fi - if [ -z $GN2_PROFILE ] ; then echo "WARNING: GN2_PROFILE has not been set - you need the environment, so I hope you know what you are doing!" export GN2_PROFILE=$(dirname $(dirname $(which genenetwork2))) diff --git a/doc/elasticsearch.org b/doc/elasticsearch.org deleted file mode 100644 index 864a8363..00000000 --- a/doc/elasticsearch.org +++ /dev/null @@ -1,247 +0,0 @@ -* Elasticsearch - -** Introduction - -GeneNetwork uses elasticsearch (ES) for all things considered -'state'. One example is user collections, another is user management. - -** Example - -To get the right environment, first you can get a python REPL with something like - -: env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ../etc/default_settings.py -cli python - -(make sure to use the correct GN2_PROFILE!) - -Next try - -#+BEGIN_SRC python - -from elasticsearch import Elasticsearch, TransportError - -es = Elasticsearch([{ "host": 'localhost', "port": '9200' }]) - -# Dump all data - -es.search("*") - -# To fetch an E-mail record from the users index - -record = es.search( - index = 'users', doc_type = 'local', body = { - "query": { "match": { "email_address": "myname@email.com" } } - }) - -# It is also possible to do wild card matching - -q = { "query": { "wildcard" : { "full_name" : "pjot*" } }} -es.search(index = 'users', doc_type = 'local', body = q) - -# To get elements from that record: - -record['hits']['hits'][0][u'_source']['full_name'] -u'Pjotr' - -record['hits']['hits'][0][u'_source']['email_address'] -u"myname@email.com" - -#+END_SRC - -** Health - -ES provides support for checking its health: - -: curl -XGET http://localhost:9200/_cluster/health?pretty=true - -#+BEGIN_SRC json - - - { - "cluster_name" : "asgard", - "status" : "yellow", - "timed_out" : false, - "number_of_nodes" : 1, - "number_of_data_nodes" : 1, - "active_primary_shards" : 5, - "active_shards" : 5, - "relocating_shards" : 0, - "initializing_shards" : 0, - "unassigned_shards" : 5 - } - -#+END_SRC - -Yellow means just one instance is running (no worries). - -To get full cluster info - -: curl -XGET "localhost:9200/_cluster/stats?human&pretty" - -#+BEGIN_SRC json -{ - "_nodes" : { - "total" : 1, - "successful" : 1, - "failed" : 0 - }, - "cluster_name" : "elasticsearch", - "timestamp" : 1529050366452, - "status" : "yellow", - "indices" : { - "count" : 3, - "shards" : { - "total" : 15, - "primaries" : 15, - "replication" : 0.0, - "index" : { - "shards" : { - "min" : 5, - "max" : 5, - "avg" : 5.0 - }, - "primaries" : { - "min" : 5, - "max" : 5, - "avg" : 5.0 - }, - "replication" : { - "min" : 0.0, - "max" : 0.0, - "avg" : 0.0 - } - } - }, - "docs" : { - "count" : 14579, - "deleted" : 0 - }, - "store" : { - "size" : "44.7mb", - "size_in_bytes" : 46892794 - }, - "fielddata" : { - "memory_size" : "0b", - "memory_size_in_bytes" : 0, - "evictions" : 0 - }, - "query_cache" : { - "memory_size" : "0b", - "memory_size_in_bytes" : 0, - "total_count" : 0, - "hit_count" : 0, - "miss_count" : 0, - "cache_size" : 0, - "cache_count" : 0, - "evictions" : 0 - }, - "completion" : { - "size" : "0b", - "size_in_bytes" : 0 - }, - "segments" : { - "count" : 24, - "memory" : "157.3kb", - "memory_in_bytes" : 161112, - "terms_memory" : "122.6kb", - "terms_memory_in_bytes" : 125569, - "stored_fields_memory" : "15.3kb", - "stored_fields_memory_in_bytes" : 15728, - "term_vectors_memory" : "0b", - "term_vectors_memory_in_bytes" : 0, - "norms_memory" : "10.8kb", - "norms_memory_in_bytes" : 11136, - "points_memory" : "111b", - "points_memory_in_bytes" : 111, - "doc_values_memory" : "8.3kb", - "doc_values_memory_in_bytes" : 8568, - "index_writer_memory" : "0b", - "index_writer_memory_in_bytes" : 0, - "version_map_memory" : "0b", - "version_map_memory_in_bytes" : 0, - "fixed_bit_set" : "0b", - "fixed_bit_set_memory_in_bytes" : 0, - "max_unsafe_auto_id_timestamp" : -1, - "file_sizes" : { } - } - }, - "nodes" : { - "count" : { - "total" : 1, - "data" : 1, - "coordinating_only" : 0, - "master" : 1, - "ingest" : 1 - }, - "versions" : [ - "6.2.1" - ], - "os" : { - "available_processors" : 16, - "allocated_processors" : 16, - "names" : [ - { - "name" : "Linux", - "count" : 1 - } - ], - "mem" : { - "total" : "125.9gb", - "total_in_bytes" : 135189286912, - "free" : "48.3gb", - "free_in_bytes" : 51922628608, - "used" : "77.5gb", - "used_in_bytes" : 83266658304, - "free_percent" : 38, - "used_percent" : 62 - } - }, - "process" : { - "cpu" : { - "percent" : 0 - }, - "open_file_descriptors" : { - "min" : 415, - "max" : 415, - "avg" : 415 - } - }, - "jvm" : { - "max_uptime" : "1.9d", - "max_uptime_in_millis" : 165800616, - "versions" : [ - { - "version" : "9.0.4", - "vm_name" : "OpenJDK 64-Bit Server VM", - "vm_version" : "9.0.4+11", - "vm_vendor" : "Oracle Corporation", - "count" : 1 - } - ], - "mem" : { - "heap_used" : "1.1gb", - "heap_used_in_bytes" : 1214872032, - "heap_max" : "23.8gb", - "heap_max_in_bytes" : 25656426496 - }, - "threads" : 110 - }, - "fs" : { - "total" : "786.4gb", - "total_in_bytes" : 844400918528, - "free" : "246.5gb", - "free_in_bytes" : 264688160768, - "available" : "206.5gb", - "available_in_bytes" : 221771468800 - }, - "plugins" : [ ], - "network_types" : { - "transport_types" : { - "netty4" : 1 - }, - "http_types" : { - "netty4" : 1 - } - } - } -} -#+BEGIN_SRC json diff --git a/test/requests/parametrized_test.py b/test/requests/parametrized_test.py deleted file mode 100644 index 50003850..00000000 --- a/test/requests/parametrized_test.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging -import unittest -from wqflask import app -from utility.elasticsearch_tools import get_elasticsearch_connection, get_user_by_unique_column -from elasticsearch import Elasticsearch, TransportError - -class ParametrizedTest(unittest.TestCase): - - def __init__(self, methodName='runTest', gn2_url="http://localhost:5003", es_url="localhost:9200"): - super(ParametrizedTest, self).__init__(methodName=methodName) - self.gn2_url = gn2_url - self.es_url = es_url - - def setUp(self): - self.es = get_elasticsearch_connection() - self.es_cleanup = [] - - es_logger = logging.getLogger("elasticsearch") - es_logger.setLevel(app.config.get("LOG_LEVEL")) - es_logger.addHandler( - logging.FileHandler("/tmp/es_TestRegistrationInfo.log")) - es_trace_logger = logging.getLogger("elasticsearch.trace") - es_trace_logger.addHandler( - logging.FileHandler("/tmp/es_TestRegistrationTrace.log")) - - def tearDown(self): - from time import sleep - self.es.delete_by_query( - index="users" - , doc_type="local" - , body={"query":{"match":{"email_address":"test@user.com"}}}) - sleep(1) diff --git a/test/requests/test-website.py b/test/requests/test-website.py index 8bfb47c2..d619a7d5 100755 --- a/test/requests/test-website.py +++ b/test/requests/test-website.py @@ -43,7 +43,6 @@ def dummy(args_obj, parser): def integration_tests(args_obj, parser): gn2_url = args_obj.host - es_url = app.config.get("ELASTICSEARCH_HOST")+":"+str(app.config.get("ELASTICSEARCH_PORT")) run_integration_tests(gn2_url, es_url) def initTest(klass, gn2_url, es_url): diff --git a/test/requests/test_forgot_password.py b/test/requests/test_forgot_password.py index 346524bc..65b061f8 100644 --- a/test/requests/test_forgot_password.py +++ b/test/requests/test_forgot_password.py @@ -1,25 +1,22 @@ import requests -from utility.elasticsearch_tools import get_user_by_unique_column from parameterized import parameterized from parametrized_test import ParametrizedTest passwork_reset_link = '' forgot_password_page = None -class TestForgotPassword(ParametrizedTest): +class TestForgotPassword(ParametrizedTest): def setUp(self): super(TestForgotPassword, self).setUp() self.forgot_password_url = self.gn2_url+"/n/forgot_password_submit" + def send_email(to_addr, msg, fromaddr="no-reply@genenetwork.org"): print("CALLING: send_email_mock()") email_data = { - "to_addr": to_addr - , "msg": msg - , "fromaddr": from_addr} + "to_addr": to_addr, "msg": msg, "fromaddr": from_addr} data = { - "es_connection": self.es, "email_address": "test@user.com", "full_name": "Test User", "organization": "Test Organisation", @@ -27,24 +24,12 @@ class TestForgotPassword(ParametrizedTest): "password_confirm": "test_password" } - def testWithoutEmail(self): data = {"email_address": ""} - error_notification = '<div class="alert alert-danger">You MUST provide an email</div>' + error_notification = ('<div class="alert alert-danger">' + 'You MUST provide an email</div>') result = requests.post(self.forgot_password_url, data=data) self.assertEqual(result.url, self.gn2_url+"/n/forgot_password") self.assertTrue( - result.content.find(error_notification) >= 0 - , "Error message should be displayed but was not") - - def testWithNonExistingEmail(self): - # Monkey patching doesn't work, so simply test that getting by email - # returns the correct data - user = get_user_by_unique_column(self.es, "email_address", "non-existent@domain.com") - self.assertTrue(user is None, "Should not find non-existent user") - - def testWithExistingEmail(self): - # Monkey patching doesn't work, so simply test that getting by email - # returns the correct data - user = get_user_by_unique_column(self.es, "email_address", "test@user.com") - self.assertTrue(user is not None, "Should find user") + result.content.find(error_notification) >= 0, + "Error message should be displayed but was not") diff --git a/test/requests/test_registration.py b/test/requests/test_registration.py index 0047e8a6..5d08bf58 100644 --- a/test/requests/test_registration.py +++ b/test/requests/test_registration.py @@ -1,31 +1,25 @@ import sys import requests -from parametrized_test import ParametrizedTest class TestRegistration(ParametrizedTest): - def tearDown(self): - for item in self.es_cleanup: - self.es.delete(index="users", doc_type="local", id=item["_id"]) def testRegistrationPage(self): - if self.es.ping(): - data = { - "email_address": "test@user.com", - "full_name": "Test User", - "organization": "Test Organisation", - "password": "test_password", - "password_confirm": "test_password" - } - requests.post(self.gn2_url+"/n/register", data) - response = self.es.search( - index="users" - , doc_type="local" - , body={ - "query": {"match": {"email_address": "test@user.com"}}}) - self.assertEqual(len(response["hits"]["hits"]), 1) - else: - self.skipTest("The elasticsearch server is down") + data = { + "email_address": "test@user.com", + "full_name": "Test User", + "organization": "Test Organisation", + "password": "test_password", + "password_confirm": "test_password" + } + requests.post(self.gn2_url+"/n/register", data) + response = self.es.search( + index="users" + , doc_type="local" + , body={ + "query": {"match": {"email_address": "test@user.com"}}}) + self.assertEqual(len(response["hits"]["hits"]), 1) + def main(gn2, es): import unittest diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 0cc963e5..32780ca6 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -5,14 +5,10 @@ import urllib.parse import numpy as np import pandas as pd -from elasticsearch import Elasticsearch, TransportError -from elasticsearch.helpers import bulk from flask import Flask, g, request from wqflask import app -from utility.elasticsearch_tools import get_elasticsearch_connection -from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI def parse_db_uri(): @@ -106,20 +102,6 @@ if __name__ == '__main__': Conn = MySQLdb.Connect(**parse_db_uri()) Cursor = Conn.cursor() - # es = Elasticsearch([{ - # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT - # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None - - es = get_elasticsearch_connection(for_user=False) - - #input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt" - #input_df = create_dataframe(input_filename) - #output_df = quantileNormalize(input_df) - - #output_df.to_csv('quant_norm.csv', sep='\t') - - #out_filename = sys.argv[1][:-4] + '_quantnorm.txt' - success, _ = bulk(es, set_data(sys.argv[1])) response = es.search( diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py deleted file mode 100644 index eae3ba03..00000000 --- a/wqflask/utility/elasticsearch_tools.py +++ /dev/null @@ -1,121 +0,0 @@ -# Elasticsearch support -# -# Some helpful commands to view the database: -# -# You can test the server being up with -# -# curl -H 'Content-Type: application/json' http://localhost:9200 -# -# List all indices -# -# curl -H 'Content-Type: application/json' 'localhost:9200/_cat/indices?v' -# -# To see the users index 'table' -# -# curl http://localhost:9200/users -# -# To list all user ids -# -# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d ' -# { -# "query" : { -# "match_all" : {} -# }, -# "stored_fields": [] -# }' -# -# To view a record -# -# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d ' -# { -# "query" : { -# "match" : { "email_address": "pjotr2017@thebird.nl"} -# } -# }' -# -# -# To delete the users index and data (dangerous!) -# -# curl -XDELETE -H 'Content-Type: application/json' 'localhost:9200/users' - - -from elasticsearch import Elasticsearch, TransportError -import logging - -from utility.logger import getLogger -logger = getLogger(__name__) - -from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT - - -def test_elasticsearch_connection(): - es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \ - ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True) - if not es.ping(): - logger.warning("Elasticsearch is DOWN") - - -def get_elasticsearch_connection(for_user=True): - """Return a connection to ES. Returns None on failure""" - logger.info("get_elasticsearch_connection") - es = None - try: - assert(ELASTICSEARCH_HOST) - assert(ELASTICSEARCH_PORT) - logger.info("ES HOST", ELASTICSEARCH_HOST) - - es = Elasticsearch([{ - "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT - }], timeout=30, retry_on_timeout=True) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None - - if for_user: - setup_users_index(es) - - es_logger = logging.getLogger("elasticsearch") - es_logger.setLevel(logging.INFO) - es_logger.addHandler(logging.NullHandler()) - except Exception as e: - logger.error("Failed to get elasticsearch connection", e) - es = None - - return es - - -def setup_users_index(es_connection): - if es_connection: - index_settings = { - "properties": { - "email_address": { - "type": "keyword"}}} - - es_connection.indices.create(index='users', ignore=400) - es_connection.indices.put_mapping( - body=index_settings, index="users", doc_type="local") - - -def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"): - return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type) - - -def save_user(es, user, user_id): - es_save_data(es, "users", "local", user, user_id) - - -def get_item_by_unique_column(es, column_name, column_value, index, doc_type): - item_details = None - try: - response = es.search( - index=index, doc_type=doc_type, body={ - "query": {"match": {column_name: column_value}} - }) - if len(response["hits"]["hits"]) > 0: - item_details = response["hits"]["hits"][0]["_source"] - except TransportError as te: - pass - return item_details - - -def es_save_data(es, index, doc_type, data_item, data_id,): - from time import sleep - es.create(index, doc_type, body=data_item, id=data_id) - sleep(1) # Delay 1 second to allow indexing diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 0efe8ca9..f28961ec 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -287,6 +287,7 @@ JS_GN_PATH = get_setting('JS_GN_PATH') GITHUB_CLIENT_ID = get_setting('GITHUB_CLIENT_ID') GITHUB_CLIENT_SECRET = get_setting('GITHUB_CLIENT_SECRET') +GITHUB_AUTH_URL = "" if GITHUB_CLIENT_ID != 'UNKNOWN' and GITHUB_CLIENT_SECRET: GITHUB_AUTH_URL = "https://github.com/login/oauth/authorize?client_id=" + \ GITHUB_CLIENT_ID + "&client_secret=" + GITHUB_CLIENT_SECRET @@ -301,10 +302,6 @@ if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET: "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2" ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL') -ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST') -ELASTICSEARCH_PORT = get_setting('ELASTICSEARCH_PORT') -# import utility.elasticsearch_tools as es -# es.test_elasticsearch_connection() SMTP_CONNECT = get_setting('SMTP_CONNECT') SMTP_USERNAME = get_setting('SMTP_USERNAME') diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py index 67e2e158..d3c4a62f 100644 --- a/wqflask/wqflask/user_session.py +++ b/wqflask/wqflask/user_session.py @@ -10,7 +10,6 @@ from flask import (Flask, g, render_template, url_for, request, make_response, from wqflask import app from utility import hmac -#from utility.elasticsearch_tools import get_elasticsearch_connection from utility.redis_tools import get_redis_conn, get_user_id, get_user_by_unique_column, set_user_attribute, get_user_collections, save_collections Redis = get_redis_conn() |