about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xbin/genenetwork27
-rw-r--r--doc/elasticsearch.org247
-rw-r--r--test/requests/parametrized_test.py32
-rwxr-xr-xtest/requests/test-website.py1
-rw-r--r--test/requests/test_forgot_password.py29
-rw-r--r--test/requests/test_registration.py36
-rw-r--r--wqflask/maintenance/quantile_normalize.py18
-rw-r--r--wqflask/utility/elasticsearch_tools.py121
-rw-r--r--wqflask/utility/tools.py5
-rw-r--r--wqflask/wqflask/user_session.py1
10 files changed, 23 insertions, 474 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 2b94b2a2..5f714d2e 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -101,13 +101,6 @@ fi
 export GN2_SETTINGS=$settings     # Python
 echo GN2_SETTINGS=$settings
 
-# This is a temporary hack to inject ES - should have added python2-elasticsearch package to guix instead
-# if [ -z $ELASTICSEARCH_PROFILE ]; then
-#     echo -e "WARNING: Elastic Search profile has not been set - use ELASTICSEARCH_PROFILE";
-# else
-#     PYTHONPATH="$PYTHONPATH${PYTHONPATH:+:}$ELASTICSEARCH_PROFILE/lib/python3.8/site-packages"
-# fi
-
 if [ -z $GN2_PROFILE ] ; then
     echo "WARNING: GN2_PROFILE has not been set - you need the environment, so I hope you know what you are doing!"
     export GN2_PROFILE=$(dirname $(dirname $(which genenetwork2)))
diff --git a/doc/elasticsearch.org b/doc/elasticsearch.org
deleted file mode 100644
index 864a8363..00000000
--- a/doc/elasticsearch.org
+++ /dev/null
@@ -1,247 +0,0 @@
-* Elasticsearch
-
-** Introduction
-
-GeneNetwork uses elasticsearch (ES) for all things considered
-'state'. One example is user collections, another is user management.
-
-** Example
-
-To get the right environment, first you can get a python REPL with something like
-
-: env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ../etc/default_settings.py -cli python
-
-(make sure to use the correct GN2_PROFILE!)
-
-Next try
-
-#+BEGIN_SRC python
-
-from elasticsearch import Elasticsearch, TransportError
-
-es = Elasticsearch([{ "host": 'localhost', "port": '9200' }])
-
-# Dump all data
-
-es.search("*")
-
-# To fetch an E-mail record from the users index
-
-record = es.search(
-            index = 'users', doc_type = 'local', body = {
-                "query": { "match": { "email_address": "myname@email.com" } }
-            })
-
-# It is also possible to do wild card matching
-
-q = { "query": { "wildcard" : { "full_name" : "pjot*" } }}
-es.search(index = 'users', doc_type = 'local', body = q)
-
-# To get elements from that record:
-
-record['hits']['hits'][0][u'_source']['full_name']
-u'Pjotr'
-
-record['hits']['hits'][0][u'_source']['email_address']
-u"myname@email.com"
-
-#+END_SRC
-
-** Health
-
-ES provides support for checking its health:
-
-: curl -XGET http://localhost:9200/_cluster/health?pretty=true
-
-#+BEGIN_SRC json
-
-
-    {
-      "cluster_name" : "asgard",
-      "status" : "yellow",
-      "timed_out" : false,
-      "number_of_nodes" : 1,
-      "number_of_data_nodes" : 1,
-      "active_primary_shards" : 5,
-      "active_shards" : 5,
-      "relocating_shards" : 0,
-      "initializing_shards" : 0,
-      "unassigned_shards" : 5
-    }
-
-#+END_SRC
-
-Yellow means just one instance is running (no worries).
-
-To get full cluster info
-
-: curl -XGET "localhost:9200/_cluster/stats?human&pretty"
-
-#+BEGIN_SRC json
-{
-  "_nodes" : {
-    "total" : 1,
-    "successful" : 1,
-    "failed" : 0
-  },
-  "cluster_name" : "elasticsearch",
-  "timestamp" : 1529050366452,
-  "status" : "yellow",
-  "indices" : {
-    "count" : 3,
-    "shards" : {
-      "total" : 15,
-      "primaries" : 15,
-      "replication" : 0.0,
-      "index" : {
-        "shards" : {
-          "min" : 5,
-          "max" : 5,
-          "avg" : 5.0
-        },
-        "primaries" : {
-          "min" : 5,
-          "max" : 5,
-          "avg" : 5.0
-        },
-        "replication" : {
-          "min" : 0.0,
-          "max" : 0.0,
-          "avg" : 0.0
-        }
-      }
-    },
-    "docs" : {
-      "count" : 14579,
-      "deleted" : 0
-    },
-    "store" : {
-      "size" : "44.7mb",
-      "size_in_bytes" : 46892794
-    },
-    "fielddata" : {
-      "memory_size" : "0b",
-      "memory_size_in_bytes" : 0,
-      "evictions" : 0
-    },
-    "query_cache" : {
-      "memory_size" : "0b",
-      "memory_size_in_bytes" : 0,
-      "total_count" : 0,
-      "hit_count" : 0,
-      "miss_count" : 0,
-      "cache_size" : 0,
-      "cache_count" : 0,
-      "evictions" : 0
-    },
-    "completion" : {
-      "size" : "0b",
-      "size_in_bytes" : 0
-    },
-    "segments" : {
-      "count" : 24,
-      "memory" : "157.3kb",
-      "memory_in_bytes" : 161112,
-      "terms_memory" : "122.6kb",
-      "terms_memory_in_bytes" : 125569,
-      "stored_fields_memory" : "15.3kb",
-      "stored_fields_memory_in_bytes" : 15728,
-      "term_vectors_memory" : "0b",
-      "term_vectors_memory_in_bytes" : 0,
-      "norms_memory" : "10.8kb",
-      "norms_memory_in_bytes" : 11136,
-      "points_memory" : "111b",
-      "points_memory_in_bytes" : 111,
-      "doc_values_memory" : "8.3kb",
-      "doc_values_memory_in_bytes" : 8568,
-      "index_writer_memory" : "0b",
-      "index_writer_memory_in_bytes" : 0,
-      "version_map_memory" : "0b",
-      "version_map_memory_in_bytes" : 0,
-      "fixed_bit_set" : "0b",
-      "fixed_bit_set_memory_in_bytes" : 0,
-      "max_unsafe_auto_id_timestamp" : -1,
-      "file_sizes" : { }
-    }
-  },
-  "nodes" : {
-    "count" : {
-      "total" : 1,
-      "data" : 1,
-      "coordinating_only" : 0,
-      "master" : 1,
-      "ingest" : 1
-    },
-    "versions" : [
-      "6.2.1"
-    ],
-    "os" : {
-      "available_processors" : 16,
-      "allocated_processors" : 16,
-      "names" : [
-        {
-          "name" : "Linux",
-          "count" : 1
-        }
-      ],
-      "mem" : {
-        "total" : "125.9gb",
-        "total_in_bytes" : 135189286912,
-        "free" : "48.3gb",
-        "free_in_bytes" : 51922628608,
-        "used" : "77.5gb",
-        "used_in_bytes" : 83266658304,
-        "free_percent" : 38,
-        "used_percent" : 62
-      }
-    },
-    "process" : {
-      "cpu" : {
-        "percent" : 0
-      },
-      "open_file_descriptors" : {
-        "min" : 415,
-        "max" : 415,
-        "avg" : 415
-      }
-    },
-    "jvm" : {
-      "max_uptime" : "1.9d",
-      "max_uptime_in_millis" : 165800616,
-      "versions" : [
-        {
-          "version" : "9.0.4",
-          "vm_name" : "OpenJDK 64-Bit Server VM",
-          "vm_version" : "9.0.4+11",
-          "vm_vendor" : "Oracle Corporation",
-          "count" : 1
-        }
-      ],
-      "mem" : {
-        "heap_used" : "1.1gb",
-        "heap_used_in_bytes" : 1214872032,
-        "heap_max" : "23.8gb",
-        "heap_max_in_bytes" : 25656426496
-      },
-      "threads" : 110
-    },
-    "fs" : {
-      "total" : "786.4gb",
-      "total_in_bytes" : 844400918528,
-      "free" : "246.5gb",
-      "free_in_bytes" : 264688160768,
-      "available" : "206.5gb",
-      "available_in_bytes" : 221771468800
-    },
-    "plugins" : [ ],
-    "network_types" : {
-      "transport_types" : {
-        "netty4" : 1
-      },
-      "http_types" : {
-        "netty4" : 1
-      }
-    }
-  }
-}
-#+BEGIN_SRC json
diff --git a/test/requests/parametrized_test.py b/test/requests/parametrized_test.py
deleted file mode 100644
index 50003850..00000000
--- a/test/requests/parametrized_test.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import logging
-import unittest
-from wqflask import app
-from utility.elasticsearch_tools import get_elasticsearch_connection, get_user_by_unique_column
-from elasticsearch import Elasticsearch, TransportError
-
-class ParametrizedTest(unittest.TestCase):
-
-    def __init__(self, methodName='runTest', gn2_url="http://localhost:5003", es_url="localhost:9200"):
-        super(ParametrizedTest, self).__init__(methodName=methodName)
-        self.gn2_url = gn2_url
-        self.es_url = es_url
-
-    def setUp(self):
-        self.es = get_elasticsearch_connection()
-        self.es_cleanup = []
-
-        es_logger = logging.getLogger("elasticsearch")
-        es_logger.setLevel(app.config.get("LOG_LEVEL"))
-        es_logger.addHandler(
-            logging.FileHandler("/tmp/es_TestRegistrationInfo.log"))
-        es_trace_logger = logging.getLogger("elasticsearch.trace")
-        es_trace_logger.addHandler(
-            logging.FileHandler("/tmp/es_TestRegistrationTrace.log"))
-
-    def tearDown(self):
-        from time import sleep
-        self.es.delete_by_query(
-            index="users"
-            , doc_type="local"
-            , body={"query":{"match":{"email_address":"test@user.com"}}})
-        sleep(1)
diff --git a/test/requests/test-website.py b/test/requests/test-website.py
index 8bfb47c2..d619a7d5 100755
--- a/test/requests/test-website.py
+++ b/test/requests/test-website.py
@@ -43,7 +43,6 @@ def dummy(args_obj, parser):
 
 def integration_tests(args_obj, parser):
     gn2_url = args_obj.host
-    es_url = app.config.get("ELASTICSEARCH_HOST")+":"+str(app.config.get("ELASTICSEARCH_PORT"))
     run_integration_tests(gn2_url, es_url)
 
 def initTest(klass, gn2_url, es_url):
diff --git a/test/requests/test_forgot_password.py b/test/requests/test_forgot_password.py
index 346524bc..65b061f8 100644
--- a/test/requests/test_forgot_password.py
+++ b/test/requests/test_forgot_password.py
@@ -1,25 +1,22 @@
 import requests
-from utility.elasticsearch_tools import get_user_by_unique_column
 from parameterized import parameterized
 from parametrized_test import ParametrizedTest
 
 passwork_reset_link = ''
 forgot_password_page = None
 
-class TestForgotPassword(ParametrizedTest):
 
+class TestForgotPassword(ParametrizedTest):
     def setUp(self):
         super(TestForgotPassword, self).setUp()
         self.forgot_password_url = self.gn2_url+"/n/forgot_password_submit"
+
         def send_email(to_addr, msg, fromaddr="no-reply@genenetwork.org"):
             print("CALLING: send_email_mock()")
             email_data = {
-                "to_addr": to_addr
-                , "msg": msg
-                , "fromaddr": from_addr}
+                "to_addr": to_addr, "msg": msg, "fromaddr": from_addr}
 
         data = {
-            "es_connection": self.es,
             "email_address": "test@user.com",
             "full_name": "Test User",
             "organization": "Test Organisation",
@@ -27,24 +24,12 @@ class TestForgotPassword(ParametrizedTest):
             "password_confirm": "test_password"
         }
 
-
     def testWithoutEmail(self):
         data = {"email_address": ""}
-        error_notification = '<div class="alert alert-danger">You MUST provide an email</div>'
+        error_notification = ('<div class="alert alert-danger">'
+                              'You MUST provide an email</div>')
         result = requests.post(self.forgot_password_url, data=data)
         self.assertEqual(result.url, self.gn2_url+"/n/forgot_password")
         self.assertTrue(
-            result.content.find(error_notification) >= 0
-            , "Error message should be displayed but was not")
-
-    def testWithNonExistingEmail(self):
-        # Monkey patching doesn't work, so simply test that getting by email
-        # returns the correct data
-        user = get_user_by_unique_column(self.es, "email_address", "non-existent@domain.com")
-        self.assertTrue(user is None, "Should not find non-existent user")
-
-    def testWithExistingEmail(self):
-        # Monkey patching doesn't work, so simply test that getting by email
-        # returns the correct data
-        user = get_user_by_unique_column(self.es, "email_address", "test@user.com")
-        self.assertTrue(user is not None, "Should find user")
+            result.content.find(error_notification) >= 0,
+            "Error message should be displayed but was not")
diff --git a/test/requests/test_registration.py b/test/requests/test_registration.py
index 0047e8a6..5d08bf58 100644
--- a/test/requests/test_registration.py
+++ b/test/requests/test_registration.py
@@ -1,31 +1,25 @@
 import sys
 import requests
-from parametrized_test import ParametrizedTest
 
 class TestRegistration(ParametrizedTest):
 
-    def tearDown(self):
-        for item in self.es_cleanup:
-            self.es.delete(index="users", doc_type="local", id=item["_id"])
 
     def testRegistrationPage(self):
-        if self.es.ping():
-            data = {
-                "email_address": "test@user.com",
-                "full_name": "Test User",
-                "organization": "Test Organisation",
-                "password": "test_password",
-                "password_confirm": "test_password"
-            }
-            requests.post(self.gn2_url+"/n/register", data)
-            response = self.es.search(
-                index="users"
-                , doc_type="local"
-                , body={
-                    "query": {"match": {"email_address": "test@user.com"}}})
-            self.assertEqual(len(response["hits"]["hits"]), 1)
-        else:
-            self.skipTest("The elasticsearch server is down")
+        data = {
+            "email_address": "test@user.com",
+            "full_name": "Test User",
+            "organization": "Test Organisation",
+            "password": "test_password",
+            "password_confirm": "test_password"
+        }
+        requests.post(self.gn2_url+"/n/register", data)
+        response = self.es.search(
+            index="users"
+            , doc_type="local"
+            , body={
+                "query": {"match": {"email_address": "test@user.com"}}})
+        self.assertEqual(len(response["hits"]["hits"]), 1)
+
 
 def main(gn2, es):
     import unittest
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
index 0cc963e5..32780ca6 100644
--- a/wqflask/maintenance/quantile_normalize.py
+++ b/wqflask/maintenance/quantile_normalize.py
@@ -5,14 +5,10 @@ import urllib.parse
 
 import numpy as np
 import pandas as pd
-from elasticsearch import Elasticsearch, TransportError
-from elasticsearch.helpers import bulk
 
 from flask import Flask, g, request
 
 from wqflask import app
-from utility.elasticsearch_tools import get_elasticsearch_connection
-from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT, SQL_URI
 
 
 def parse_db_uri():
@@ -106,20 +102,6 @@ if __name__ == '__main__':
     Conn = MySQLdb.Connect(**parse_db_uri())
     Cursor = Conn.cursor()
 
-    # es = Elasticsearch([{
-    #    "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
-    # }], timeout=60) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
-
-    es = get_elasticsearch_connection(for_user=False)
-
-    #input_filename = "/home/zas1024/cfw_data/" + sys.argv[1] + ".txt"
-    #input_df = create_dataframe(input_filename)
-    #output_df = quantileNormalize(input_df)
-
-    #output_df.to_csv('quant_norm.csv', sep='\t')
-
-    #out_filename = sys.argv[1][:-4] + '_quantnorm.txt'
-
     success, _ = bulk(es, set_data(sys.argv[1]))
 
     response = es.search(
diff --git a/wqflask/utility/elasticsearch_tools.py b/wqflask/utility/elasticsearch_tools.py
deleted file mode 100644
index eae3ba03..00000000
--- a/wqflask/utility/elasticsearch_tools.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Elasticsearch support
-#
-# Some helpful commands to view the database:
-#
-# You can test the server being up with
-#
-#   curl -H 'Content-Type: application/json' http://localhost:9200
-#
-# List all indices
-#
-#   curl -H 'Content-Type: application/json' 'localhost:9200/_cat/indices?v'
-#
-# To see the users index 'table'
-#
-#   curl http://localhost:9200/users
-#
-# To list all user ids
-#
-# curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
-# {
-#     "query" : {
-#         "match_all" : {}
-#     },
-#     "stored_fields": []
-# }'
-#
-# To view a record
-#
-#   curl -H 'Content-Type: application/json' http://localhost:9200/users/local/_search?pretty=true -d '
-#   {
-#     "query" : {
-#       "match" : { "email_address": "pjotr2017@thebird.nl"}
-#     }
-#   }'
-#
-#
-# To delete the users index and data (dangerous!)
-#
-#   curl -XDELETE -H 'Content-Type: application/json' 'localhost:9200/users'
-
-
-from elasticsearch import Elasticsearch, TransportError
-import logging
-
-from utility.logger import getLogger
-logger = getLogger(__name__)
-
-from utility.tools import ELASTICSEARCH_HOST, ELASTICSEARCH_PORT
-
-
-def test_elasticsearch_connection():
-    es = Elasticsearch(['http://' + ELASTICSEARCH_HOST + \
-                        ":" + str(ELASTICSEARCH_PORT) + '/'], verify_certs=True)
-    if not es.ping():
-        logger.warning("Elasticsearch is DOWN")
-
-
-def get_elasticsearch_connection(for_user=True):
-    """Return a connection to ES. Returns None on failure"""
-    logger.info("get_elasticsearch_connection")
-    es = None
-    try:
-        assert(ELASTICSEARCH_HOST)
-        assert(ELASTICSEARCH_PORT)
-        logger.info("ES HOST", ELASTICSEARCH_HOST)
-
-        es = Elasticsearch([{
-            "host": ELASTICSEARCH_HOST, "port": ELASTICSEARCH_PORT
-        }], timeout=30, retry_on_timeout=True) if (ELASTICSEARCH_HOST and ELASTICSEARCH_PORT) else None
-
-        if for_user:
-            setup_users_index(es)
-
-        es_logger = logging.getLogger("elasticsearch")
-        es_logger.setLevel(logging.INFO)
-        es_logger.addHandler(logging.NullHandler())
-    except Exception as e:
-        logger.error("Failed to get elasticsearch connection", e)
-        es = None
-
-    return es
-
-
-def setup_users_index(es_connection):
-    if es_connection:
-        index_settings = {
-            "properties": {
-                "email_address": {
-                    "type": "keyword"}}}
-
-        es_connection.indices.create(index='users', ignore=400)
-        es_connection.indices.put_mapping(
-            body=index_settings, index="users", doc_type="local")
-
-
-def get_user_by_unique_column(es, column_name, column_value, index="users", doc_type="local"):
-    return get_item_by_unique_column(es, column_name, column_value, index=index, doc_type=doc_type)
-
-
-def save_user(es, user, user_id):
-    es_save_data(es, "users", "local", user, user_id)
-
-
-def get_item_by_unique_column(es, column_name, column_value, index, doc_type):
-    item_details = None
-    try:
-        response = es.search(
-            index=index, doc_type=doc_type, body={
-                "query": {"match": {column_name: column_value}}
-            })
-        if len(response["hits"]["hits"]) > 0:
-            item_details = response["hits"]["hits"][0]["_source"]
-    except TransportError as te:
-        pass
-    return item_details
-
-
-def es_save_data(es, index, doc_type, data_item, data_id,):
-    from time import sleep
-    es.create(index, doc_type, body=data_item, id=data_id)
-    sleep(1)  # Delay 1 second to allow indexing
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 0efe8ca9..f28961ec 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -287,6 +287,7 @@ JS_GN_PATH = get_setting('JS_GN_PATH')
 
 GITHUB_CLIENT_ID = get_setting('GITHUB_CLIENT_ID')
 GITHUB_CLIENT_SECRET = get_setting('GITHUB_CLIENT_SECRET')
+GITHUB_AUTH_URL = ""
 if GITHUB_CLIENT_ID != 'UNKNOWN' and GITHUB_CLIENT_SECRET:
     GITHUB_AUTH_URL = "https://github.com/login/oauth/authorize?client_id=" + \
                       GITHUB_CLIENT_ID + "&client_secret=" + GITHUB_CLIENT_SECRET
@@ -301,10 +302,6 @@ if ORCID_CLIENT_ID != 'UNKNOWN' and ORCID_CLIENT_SECRET:
         "&redirect_uri=" + GN2_BRANCH_URL + "n/login/orcid_oauth2"
     ORCID_TOKEN_URL = get_setting('ORCID_TOKEN_URL')
 
-ELASTICSEARCH_HOST = get_setting('ELASTICSEARCH_HOST')
-ELASTICSEARCH_PORT = get_setting('ELASTICSEARCH_PORT')
-# import utility.elasticsearch_tools as es
-# es.test_elasticsearch_connection()
 
 SMTP_CONNECT = get_setting('SMTP_CONNECT')
 SMTP_USERNAME = get_setting('SMTP_USERNAME')
diff --git a/wqflask/wqflask/user_session.py b/wqflask/wqflask/user_session.py
index 67e2e158..d3c4a62f 100644
--- a/wqflask/wqflask/user_session.py
+++ b/wqflask/wqflask/user_session.py
@@ -10,7 +10,6 @@ from flask import (Flask, g, render_template, url_for, request, make_response,
 from wqflask import app
 from utility import hmac
 
-#from utility.elasticsearch_tools import get_elasticsearch_connection
 from utility.redis_tools import get_redis_conn, get_user_id, get_user_by_unique_column, set_user_attribute, get_user_collections, save_collections
 Redis = get_redis_conn()