aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-10-28 11:05:19 +0300
committerBonfaceKilz2021-10-28 11:10:21 +0300
commit03caa57ad209f3bdd135be9d6516b94261c9b8de (patch)
tree17ddfbdd37fe7fa7b3007dbaf2805813f136be2b
parent230184bf1484ed672bf66c29110bcb47e556f72f (diff)
downloadgenenetwork2-03caa57ad209f3bdd135be9d6516b94261c9b8de.tar.gz
Remove all elasticsearch references in gn2
-rwxr-xr-xbin/genenetwork27
-rw-r--r--doc/elasticsearch.org247
-rw-r--r--test/requests/parametrized_test.py32
-rwxr-xr-xtest/requests/test-website.py1
-rw-r--r--test/requests/test_forgot_password.py29
-rw-r--r--test/requests/test_registration.py36
-rw-r--r--wqflask/maintenance/quantile_normalize.py18
-rw-r--r--wqflask/utility/elasticsearch_tools.py121
-rw-r--r--wqflask/utility/tools.py5
-rw-r--r--wqflask/wqflask/user_session.py1
10 files changed, 23 insertions, 474 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 2b94b2a2..5f714d2e 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -101,13 +101,6 @@ fi
export GN2_SETTINGS=$settings # Python
echo GN2_SETTINGS=$settings
-# This is a temporary hack to inject ES - should have added python2-elasticsearch package to guix instead
-# if [ -z $ELASTICSEARCH_PROFILE ]; then
-# echo -e "WARNING: Elastic Search profile has not been set - use ELASTICSEARCH_PROFILE";
-# else
-# PYTHONPATH="$PYTHONPATH${PYTHONPATH:+:}$ELASTICSEARCH_PROFILE/lib/python3.8/site-packages"
-# fi
-
if [ -z $GN2_PROFILE ] ; then
echo "WARNING: GN2_PROFILE has not been set - you need the environment, so I hope you know what you are doing!"
export GN2_PROFILE=$(dirname $(dirname $(which genenetwork2)))
diff --git a/doc/elasticsearch.org b/doc/elasticsearch.org
deleted file mode 100644
index 864a8363..00000000
--- a/doc/elasticsearch.org
+++ /dev/null
@@ -1,247 +0,0 @@
-* Elasticsearch
-
-** Introduction
-
-GeneNetwork uses elasticsearch (ES) for all things considered
-'state'. One example is user collections, another is user management.
-
-** Example
-
-To get the right environment, first you can get a python REPL with something like
-
-: env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ../etc/default_settings.py -cli python
-
-(make sure to use the correct GN2_PROFILE!)
-
-Next try
-
-#+BEGIN_SRC python
-
-from elasticsearch import Elasticsearch, TransportError
-
-es = Elasticsearch([{ "host": 'localhost', "port": '9200' }])
-
-# Dump all data
-
-es.search("*")
-
-# To fetch an E-mail record from the users index
-
-record = es.search(
- index = 'users', doc_type = 'local', body = {
- "query": { "match": { "email_address": "myname@email.com" } }
- })
-
-# It is also possible to do wild card matching
-
-q = { "query": { "wildcard" : { "full_name" : "pjot*" } }}
-es.search(index = 'users', doc_type = 'local', body = q)
-
-# To get elements from that record:
-
-record['hits']['hits'][0][u'_source']['full_name']
-u'Pjotr'
-
-record['hits']['hits'][0][u'_source']['email_address']
-u"myname@email.com"
-
-#+END_SRC
-
-** Health
-
-ES provides support for checking its health:
-
-: curl -XGET http://localhost:9200/_cluster/health?pretty=true
-
-#+BEGIN_SRC json
-
-
- {
- "cluster_name" : "asgard",
- "status" : "yellow",
- "timed_out" : false,
- "number_of_nodes" : 1,
- "number_of_data_nodes" : 1,
- "active_primary_shards" : 5,
- "active_shards" : 5,
- "relocating_shards" : 0,
- "initializing_shards" : 0,
- "unassigned_shards" : 5
- }
-
-#+END_SRC
-
-Yellow means just one instance is running (no worries).
-
-To get full cluster info
-
-: curl -XGET "localhost:9200/_cluster/stats?human&pretty"
-
-#+BEGIN_SRC json
-{
- "_nodes" : {
- "total" : 1,
- "successful" : 1,
- "failed" : 0
- },
- "cluster_name" : "elasticsearch",
- "timestamp" : 1529050366452,
- "status" : "yellow",
- "indices" : {
- "count" : 3,
- "shards" : {
- "total" : 15,
- "primaries" : 15,
- "replication" : 0.0,
- "index" : {
- "shards" : {
- "min" : 5,
- "max" : 5,
- "avg" : 5.0
- },
- "primaries" : {
- "min" : 5,
- "max" : 5,
- "avg" : 5.0
- },
- "replication" : {
- "min" : 0.0,
- "max" : 0.0,
- "avg" : 0.0
- }
- }
- },
- "docs" : {
- "count" : 14579,
- "deleted" : 0
- },
- "store" : {
- "size" : "44.7mb",
- "size_in_bytes" : 46892794
- },
- "fielddata" : {
- "memory_size" : "0b",
- "memory_size_in_bytes" : 0,
- "evictions" : 0
- },
- "query_cache" : {
- "memory_size" : "0b",
- "memory_size_in_bytes" : 0,
- "total_count" : 0,
- "hit_count" : 0,
- "miss_count" : 0,
- "cache_size" : 0,
- "cache_count" : 0,
- "evictions" : 0
- },
- "completion" : {
- "size" : "0b",
- "size_in_bytes" : 0
- },
- "segments" : {
- "count" : 24,
- "memory" : "157.3kb",
- "memory_in_bytes" : 161112,
- "terms_memory" : "122.6kb",
- "terms_memory_in_bytes" : 125569,
- "stored_fields_memory" : "15.3kb",
- "stored_fields_memory_in_bytes" : 15728,
- "term_vectors_memory" : "0b",
- "term_vectors_memory_in_bytes" : 0,
- "norms_memory" : "10.8kb",
- "norms_memory_in_bytes" : 11136,
- "points_memory" : "111b",
- "points_memory_in_bytes" : 111,
- "doc_values_memory" : "8.3kb",
- "doc_values_memory_in_bytes" : 8568,
- "index_writer_memory" : "0b",
- "index_writer_memory_in_bytes" : 0,
- "version_map_memory" : "0b",
- "version_map_memory_in_bytes" : 0,
- "fixed_bit_set" : "0b",
- "fixed_bit_set_memory_in_bytes" : 0,
- "max_unsafe_auto_id_timestamp" : -1,
- "file_sizes" : { }
- }
- },
- "nodes" : {
- "count" : {
- "total" : 1,
- "data" : 1,
- "coordinating_only" : 0,
- "master" : 1,
- "ingest" : 1
- },
- "versions" : [
- "6.2.1"
- ],
- "os" : {
- "available_processors" : 16,
- "allocated_processors" : 16,
- "names" : [
- {
- "name" : "Linux",
- "count" : 1
- }
- ],
- "mem" : {
- "total" : "125.9gb",
- "total_in_bytes" : 135189286912,
- "free" : "48.3gb",
- "free_in_bytes" : 51922628608,
- "used" : "77.5gb",
- "used_in_bytes" : 83266658304,
- "free_percent" : 38,
- "used_percent" : 62
- }
- },
- "process" : {
- "cpu" : {
- "percent" : 0
- },
- "open_file_descriptors" : {
- "min" : 415,
- "max" : 415,
- "avg" : 415
- }
- },
- "jvm" : {
- "max_uptime" : "1.9d",
- "max_uptime_in_millis" : 165800616,
- "versions" : [
- {
- "version" : "9.0.4",
- "vm_name" : "OpenJDK 64-Bit Server VM",
- "vm_version" : "9.0.4+11",
- "vm_vendor" : "Oracle Corporation",
- "count" : 1
- }
- ],
- "mem" : {
- "heap_used" : "1.1gb",
- "heap_used_in_bytes" : 1214872032,
- "heap_max" : "23.8gb",
- "heap_max_in_bytes" : 25656426496
- },
- "threads" : 110
- },
- "fs" : {
- "total" : "786.4gb",
- "total_in_bytes" : 844400918528,
- "free" : "246.5gb",
- "free_in_bytes" : 264688160768,
- "available" : "206.5gb",
- "available_in_bytes" : 221771468800
- },
- "plugins" : [ ],
- "network_types" : {
- "transport_types" : {
- "netty4" : 1
- },
- "http_types" : {
- "netty4" : 1
- }
- }
- }
-}
-#+BEGIN_SRC json
diff --git a/test/requests/parametrized_test.py b/test/requests/parametrized_test.py
deleted file mode 100644
index 50003850..00000000
--- a/test/requests/parametrized_test.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import logging
-import unittest
-from wqflask import app
-from utility.elasticsearch_tools import get_elasticsearch_connection, get_user_by_unique_column
-from elasticsearch import Elasticsearch, TransportError
-
-class ParametrizedTest(unittest.TestCase):
-
- def __init__(self, methodName='runTest', gn2_url="http://localhost:5003", es_url="localhost:9200"):
- super(ParametrizedTest, self).__init__(methodName=methodName)
- self.gn2_url = gn2_url
- self.es_url = es_url
-
- def setUp(self):
- self.es = get_elasticsearch_connection()
- self.es_cleanup = []
-
- es_logger = logging.getLogger("elasticsearch")
- es_logger.setLevel(app.config.get("LOG_LEVEL"))
- es_logger.addHandler(
- logging.FileHandler("/tmp/es_TestRegistrationInfo.log"))
- es_trace_logger = logging.getLogger("elasticsearch.trace")
- es_trace_logger.addHandler(
- logging.FileHandler("/tmp/es_TestRegistrationTrace.log"))
-
- def tearDown(self):
- from time import sleep
- self.es.delete_by_query(
- index="users"
- , doc_type="local"
- , body={"query":{"match":{"email_address":"test@user.com"}}})
- sleep(1)
diff --git a/test/requests/test-website.py b/test/requests/test-website.py
index 8bfb47c2..d619a7d5 100755
--- a/test/requests/test-website.py
+++ b/test/requests/test-website.py
@@ -43,7 +43,6 @@ def dummy(args_obj, parser):
def integration_tests(args_obj, parser):
gn2_url = args_obj.host
- es_url = app.config.get("ELASTICSEARCH_HOST")+":"+str(app.config.get("ELASTICSEARCH_PORT"))
run_integration_tests(gn2_url, es_url)
def initTest(klass, gn2_url, es_url):
diff --git a/test/requests/test_forgot_password.py b/test/requests/test_forgot_password.py
index 346524bc..65b061f8 100644
--- a/test/requests/test_forgot_password.py
+++ b/test/requests/test_forgot_password.py
@@ -1,25 +1,22 @@
import requests
-from utility.elasticsearch_tools import get_user_by_unique_column
from parameterized import parameterized
from parametrized_test import ParametrizedTest
passwork_reset_link = ''
forgot_password_page = None
-class TestForgotPassword(ParametrizedTest):
+class TestForgotPassword(ParametrizedTest):
def setUp(self):
super(TestForgotPassword, self).setUp()
self.forgot_password_url = self.gn2_url+"/n/forgot_password_submit"
+
def send_email(to_addr, msg, fromaddr="no-reply@genenetwork.org"):
print("CALLING: send_email_mock()")
email_data = {
- "to_addr": to_addr
- , "msg": msg
- , "fromaddr": from_addr}
+ "to_addr": to_addr, "msg": msg, "fromaddr": from_addr}
data = {
- "es_connection": self.es,
"email_address": "test@user.com",
"full_name": "Test User",
"organization": "Test Organisation",
@@ -27,24 +24,12 @@ class TestForgotPassword(ParametrizedTest):
"password_confirm": "test_password"
}
-
def testWithoutEmail(self):
data = {"email_address": ""}
- error_notification = '<div class="alert alert-danger">You MUST provide an email</div>'
+ error_notification = ('<div class="alert alert-danger">'
+ 'You MUST provide an email</div>')
result = requests.post(self.forgot_password_url, data=data)
self.assertEqual(result.url, self.gn2_url+"/n/forgot_password")
self.assertTrue(
- result.content.find(error_notification) >= 0
- , "Error message should be displayed but was not")
-
- def testWithNonExistingEmail(self):
- # Monkey patching doesn't work, so simply test that getting by email
- # returns the correct data
- user = get_user_by_unique_column(self.es, "email_address", "non-existent@domain.com")
- self.assertTrue(user is None, "Should not find non-existent user")
-
- def testWithExistingEmail(self):
- # Monkey patching doesn't work, so simply test that getting by email
- # returns the correct data
- user = get_user_by_unique_column(self.es, "email_address", "test@user.com")
- self.assertTrue(user is not None, "Should find user")
+ result.content.find(error_notification) >= 0,
+ "Error message should be displayed but was not")
diff --git a/test/requests/test_registration.py b/test/requests/test_registration.py
index 0047e8a6..5d08bf58 100644
--- a/test/requests/test_registration.py
+++ b/test/requests/test_registration.py
@@ -1,31 +1,25 @@
import sys
import requests
-from parametrized_test import ParametrizedTest
class TestRegistration(ParametrizedTest):
- def tearDown(self):
- for item in self.es_cleanup:
- self.es.delete(index="users", doc_type="local", id=item["_id"])
def testRegistrationPage(self):
- if self.es.ping():
- data = {
- "email_address": "test@user.com",
- "full_name": "Test User",
- "organization": "Test Organisation",
- "password": "test_password",
- "password_confirm": "test_password"
- }
- requests.post(self.gn2_url+"/n/register", data)
- response = self.es.search(
- index="users"
- , doc_type="local"
- , body={
- "query": {"match": {"email_address": "test@user.com"}}})
- self.assertEqual(len(response["hits"]["hits"]), 1)
- else:
- self.skipTest("The elasticsearch server is down")
+ data = {
+ "email_address": "test@user.com",
+ "full_name": "Test User",
+ "organization": "Test Organisation",
+ "password": "test_password",
+ "password_confirm": "test_password"
+ }
+ requests.post(self.gn2_url+"/n/register", data)
+ response = self.es.search(
+ index="users"
+ , doc_type="local"
+ , body={
+ "query": {"match": {"email_address": "test@user.com"}}})
+ self.assertEqual(len(response["hits"]["hits"]), 1)
+
def main(gn2, es):
import unittest
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index 0cc963e5..32780ca6 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -5,14 +5,10 @@ import urllib.parse
import numpy as np
import pandas as pd
-from elasticsearch import Elasticsearch, TransportError
-from elasticsearch.helpers import bulk
from flask import Flask, g, request
from wqflask import app
-from utility.elasticsearch_tools import get_elasticsearch_connection
-from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI
def parse_db_uri():
@@ -106,20 +102,6 @@ if __name__ == '__main__':
Conn = MySQLdb.Connect(**parse_db_uri())
Cursor = Conn.cursor()
- # es = Elasticsearch([{
- # "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
- # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
-
- es = get_elasticsearch_connection(for_user=False)
-
- #input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
- #input_df = create_dataframe(input_filename)
- #output_df = quantileNormalize(input_df)
-
- #output_df.to_csv('quant_norm.csv', sep='\t')
-
- #out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
-
success, _ = bulk(es, set_data(sys.argv[1]))
response = es.search(
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
deleted file mode 100644
index eae3ba03..00000000
--- a/wqflask/utility/elasticsearch_tools.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Elasticsearch support
-#
-# Some helpful commands to view the database:
-#
-# You can test the server being up with
-#
-# curl -H 'Content-Type: application/json' http://localhost:9200
-#
-# List all indices
-#
-# curl -H 'Content-Type: application/json' 'localhost:9200/_cat/indices?v'
-#
-# To see the users index 'table'
-#
-# curl http://localhost:9200/users
-#
-# To list all user ids
-#
-# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
-# {
-# "query" : {
-# "match_all" : {}
-# },
-# "stored_fields": []
-# }'
-#
-# To view a record
-#
-# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
-# {
-# "query" : {
-# "match" : { "email_address": "pjotr2017@thebird.nl"}
-# }
-# }'
-#
-#
-# To delete the users index and data (dangerous!)
-#
-# curl -XDELETE -H 'Content-Type: application/json' 'localhost:9200/users'
-
-
-from elasticsearch import Elasticsearch, TransportError
-import logging
-
-from utility.logger import getLogger
-logger = getLogger(__name__)
-
-from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT
-
-
-def test_elasticsearch_connection():
- es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \
- ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True)
- if not es.ping():
- logger.warning("Elasticsearch is DOWN")
-
-
-def get_elasticsearch_connection(for_user=True):
- """Return a connection to ES. Returns None on failure"""
- logger.info("get_elasticsearch_connection")
- es = None
- try:
- assert(ELASTICSEARCH_HOST)
- assert(ELASTICSEARCH_PORT)
- logger.info("ES HOST", ELASTICSEARCH_HOST)
-
- es = Elasticsearch([{
- "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
- }], timeout=30, retry_on_timeout=True) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
-
- if for_user:
- setup_users_index(es)
-
- es_logger = logging.getLogger("elasticsearch")
- es_logger.setLevel(logging.INFO)
- es_logger.addHandler(logging.NullHandler())
- except Exception as e:
- logger.error("Failed to get elasticsearch connection", e)
- es = None
-
- return es
-
-
-def setup_users_index(es_connection):
- if es_connection:
- index_settings = {
- "properties": {
- "email_address": {
- "type": "keyword"}}}
-
- es_connection.indices.create(index='users', ignore=400)
- es_connection.indices.put_mapping(
- body=index_settings, index="users", doc_type="local")
-
-
-def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"):
- return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type)
-
-
-def save_user(es, user, user_id):
- es_save_data(es, "users", "local", user, user_id)
-
-
-def get_item_by_unique_column(es, column_name, column_value, index, doc_type):
- item_details = None
- try:
- response = es.search(
- index=index, doc_type=doc_type, body={
- "query": {"match": {column_name: column_value}}
- })
- if len(response["hits"]["hits"]) > 0:
- item_details = response["hits"]["hits"][0]["_source"]
- except TransportError as te:
- pass
- return item_details
-
-
-def es_save_data(es, index, doc_type, data_item, data_id,):
- from time import sleep
- es.create(index, doc_type, body=data_item, id=data_id)
- sleep(1) # Delay 1 second to allow indexing
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 0efe8ca9..f28961ec 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -287,6 +287,7 @@ JS_GN_PATH = get_setting('JS_GN_PATH')
GITHUB_CLIENT_ID = get_setting('GITHUB_CLIENT_ID')
GITHUB_CLIENT_SECRET = get_setting('GITHUB_CLIENT_SECRET')
+GITHUB_AUTH_URL = ""
if GITHUB_CLIENT_ID != 'UNKNOWN' and GITHUB_CLIENT_SECRET:
GITHUB_AUTH_URL = "https://github.com/login/oauth/authorize?client_id=" + \
GITHUB_CLIENT_ID + "&client_secret=" + GITHUB_CLIENT_SECRET
@@ -301,10 +302,6 @@ if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET:
"&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2"
ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL')
-ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST')
-ELASTICSEARCH_PORT = get_setting('ELASTICSEARCH_PORT')
-# import utility.elasticsearch_tools as es
-# es.test_elasticsearch_connection()
SMTP_CONNECT = get_setting('SMTP_CONNECT')
SMTP_USERNAME = get_setting('SMTP_USERNAME')
diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py
index 67e2e158..d3c4a62f 100644
--- a/wqflask/wqflask/user_session.py
+++ b/wqflask/wqflask/user_session.py
@@ -10,7 +10,6 @@ from flask import (Flask, g, render_template, url_for, request, make_response,
from wqflask import app
from utility import hmac
-#from utility.elasticsearch_tools import get_elasticsearch_connection
from utility.redis_tools import get_redis_conn, get_user_id, get_user_by_unique_column, set_user_attribute, get_user_collections, save_collections
Redis = get_redis_conn()