From 1f5ee0e2070025e1921caee3fbd617aec201e18d Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 30 Sep 2024 20:27:11 +0300 Subject: Spin up a local virtuoso instance to run RDF tests against. Signed-off-by: Munyoki Kilyungi --- tests/fixtures/rdf.py | 132 ++++++++++++++++++++++++----------------- tests/unit/db/rdf/test_wiki.py | 8 +-- 2 files changed, 82 insertions(+), 58 deletions(-) diff --git a/tests/fixtures/rdf.py b/tests/fixtures/rdf.py index 028ef8d..38d8f25 100644 --- a/tests/fixtures/rdf.py +++ b/tests/fixtures/rdf.py @@ -1,71 +1,95 @@ """Test fixtures to set up a test named graph for loading RDF data.""" import os -from flask import config +import tempfile +import subprocess +from string import Template + +import psutil # type: ignore import pytest import requests from requests.auth import HTTPDigestAuth +from tests.fixtures.virtuoso import VIRTUOSO_INI_FILE + + +SPARQL_CONF = { + "sparql_user": "dba", + "sparql_password": "dba", + "sparql_auth_uri": "http://localhost:8191/sparql-auth/", + "sparql_crud_auth_uri": "http://localhost:8191/sparql-graph-crud-auth/", + "sparql_endpoint": "http://localhost:8191/sparql/", +} -def get_sparql_auth_conf() -> dict: - """Fetch SPARQL auth configuration from the configurafrom flask - import configuration object.""" - # When loading from the environment, GN3_CONF precedes - # GN3_SECRETS. Don't change this order. - sparql_conf = config.Config("") - if os.environ.get("GN3_CONF"): - # Check whether GN3_CONF has been set, and ignore GN3_CONF - # otherwise. In CD, we use a mixed-text file, so we don't - # have an explicit PATH to point this to. - # https://git.genenetwork.org/gn-machines/tree/genenetwork-development.scm#n517 - sparql_conf.from_envvar("GN3_CONF") - # Set sane defaults for GN3_SECRETS to CD's secret file. In CD, - # this file is set in the genenetwork3 cd gexp: - # https://git.genenetwork.org/gn-machines/tree/genenetwork-development.scm#n516 - # However, during testing GN3_SECRETS isn't set; and by default, - # we run guix's default tests for python projects: `pytest` - # https://git.genenetwork.org/guix-bioinformatics/tree/gn/packages/genenetwork.scm#n182 - if os.environ.get("GN3_SECRETS"): - sparql_conf.from_envvar("GN3_SECRETS") - # If the sparql configurations aren't loaded, set sane defaults. - # This way, the genenetwork3 package builds. - return { - "sparql_user": sparql_conf.get("SPARQL_USER", "dba"), - "sparql_auth_uri": sparql_conf.get( - "SPARQL_AUTH_URI", "http://localhost:8890/sparql-auth/" - ), - "sparql_crud_auth_uri": sparql_conf.get( - "SPARQL_CRUD_AUTH_URI", "http://localhost:8890/sparql-graph-crud-auth" - ), - "sparql_endpoint": sparql_conf.get("SPARQL_ENDPOINT", "http://localhost:8890"), - "sparql_password": sparql_conf.get("SPARQL_PASSWORD", "dba"), - } +def get_process_id(name) -> list: + """Return process ids found by (partial) name or regex. -# XXXX: Currently we run the tests against CD's virtuoso instance. -# This is not idempotent. Consider having a special virtuoso instance -# just for running tests. -@pytest.fixture(scope="module") + >>> get_process_id('kthreadd') + [2] + >>> get_process_id('watchdog') + [10, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61] # ymmv + >>> get_process_id('non-existent process') + [] + """ + with subprocess.Popen( + ["pgrep", "-f", name], stdout=subprocess.PIPE, shell=False + ) as proc: + response = proc.communicate()[0] + return [int(pid) for pid in response.split()] + + +@pytest.fixture(scope="session") def rdf_setup(): """Upload RDF to a Virtuoso named graph""" - # Define the URL and file - sparql_conf = get_sparql_auth_conf() - url = sparql_conf["sparql_crud_auth_uri"] + dir_path = os.path.dirname(__file__).split("fixtures")[0] file_path = os.path.join( - os.path.dirname(__file__).split("fixtures")[0], + dir_path, "test_data/ttl-files/test-data.ttl", ) + # We intentionally use a temporary directory. This way, all the + # database created by virtuoso are cleaned after running tests. + with tempfile.TemporaryDirectory() as tmpdirname: + init_file = os.path.join(tmpdirname, "virtuoso.ini") + # Create the virtuoso init file which we use when + # bootstrapping virtuoso. + with open(init_file, "w", encoding="utf-8") as file_: + file_.write(Template(VIRTUOSO_INI_FILE).substitute( + dir_path=tmpdirname)) + # Here we intentionally ignore the "+foreground" option to + # allow virtuoso to run in the background. + with subprocess.Popen( + [ + "virtuoso-t", + "+wait", + "+no-checkpoint", + "+configfile", + init_file, + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) as pid: + pid.wait() + # Define the query parameters and authentication + params = {"graph": "http://cd-test.genenetwork.org"} + auth = HTTPDigestAuth("dba", "dba") - # Define the query parameters and authentication - params = {"graph": "http://cd-test.genenetwork.org"} - auth = HTTPDigestAuth( - sparql_conf["sparql_user"], sparql_conf["sparql_password"]) - - # Make sure this graph does not exist before running anything - requests.delete(url, params=params, auth=auth) + # Make sure this graph does not exist before running anything + requests.delete( + SPARQL_CONF["sparql_crud_auth_uri"], params=params, auth=auth + ) - # Open the file in binary mode and send the request - with open(file_path, "rb") as file: - response = requests.put(url, params=params, auth=auth, data=file) - yield response - requests.delete(url, params=params, auth=auth) + # Open the file in binary mode and send the request + with open(file_path, "rb") as file: + response = requests.put( + SPARQL_CONF["sparql_crud_auth_uri"], + params=params, + auth=auth, + data=file, + ) + yield response + requests.delete( + SPARQL_CONF["sparql_crud_auth_uri"], params=params, auth=auth + ) + for pid_ in get_process_id(init_file): + psutil.Process(pid_).kill() diff --git a/tests/unit/db/rdf/test_wiki.py b/tests/unit/db/rdf/test_wiki.py index 8dd4f3a..3abf3ad 100644 --- a/tests/unit/db/rdf/test_wiki.py +++ b/tests/unit/db/rdf/test_wiki.py @@ -19,7 +19,7 @@ import pytest # pylint: disable=W0611 from tests.fixtures.rdf import ( rdf_setup, - get_sparql_auth_conf + SPARQL_CONF, ) from gn3.db.rdf.wiki import ( @@ -187,7 +187,7 @@ def test_sanitize_result(result, expected): @pytest.mark.rdf def test_get_wiki_entries_by_symbol(rdf_setup): # pylint: disable=W0613,W0621 """Test that wiki entries are fetched correctly by symbol""" - sparql_conf = get_sparql_auth_conf() + sparql_conf = SPARQL_CONF result = get_wiki_entries_by_symbol( symbol="ckb", sparql_uri=sparql_conf["sparql_endpoint"], @@ -262,7 +262,7 @@ and C1QL3 (CTRP13).", @pytest.mark.rdf def test_get_comment_history(rdf_setup): # pylint: disable=W0613,W0621 """Test fetching a comment's history from RDF""" - sparql_conf = get_sparql_auth_conf() + sparql_conf = SPARQL_CONF result = get_comment_history( comment_id=1158, sparql_uri=sparql_conf["sparql_endpoint"], @@ -355,7 +355,7 @@ Possible 3' UTR variants.", @pytest.mark.rdf def test_update_wiki_comment(rdf_setup): # pylint: disable=W0613,W0621 """Test that a comment is updated correctly""" - sparql_conf = get_sparql_auth_conf() + sparql_conf = SPARQL_CONF update_wiki_comment( insert_dict={ "Id": 230, -- cgit v1.2.3