diff options
author | Munyoki Kilyungi | 2024-09-30 20:27:11 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-10-02 06:59:11 +0300 |
commit | 1f5ee0e2070025e1921caee3fbd617aec201e18d (patch) | |
tree | 72a94396782fa862a2da8d4d85f2b41974d442f2 /tests/fixtures/rdf.py | |
parent | 3d7f34290ed3f50403e7fcc29af4b035256dd6cb (diff) | |
download | genenetwork3-1f5ee0e2070025e1921caee3fbd617aec201e18d.tar.gz |
Spin up a local virtuoso instance to run RDF tests against.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'tests/fixtures/rdf.py')
-rw-r--r-- | tests/fixtures/rdf.py | 132 |
1 files changed, 78 insertions, 54 deletions
diff --git a/tests/fixtures/rdf.py b/tests/fixtures/rdf.py index 028ef8d..38d8f25 100644 --- a/tests/fixtures/rdf.py +++ b/tests/fixtures/rdf.py @@ -1,71 +1,95 @@ """Test fixtures to set up a test named graph for loading RDF data.""" import os -from flask import config +import tempfile +import subprocess +from string import Template + +import psutil # type: ignore import pytest import requests from requests.auth import HTTPDigestAuth +from tests.fixtures.virtuoso import VIRTUOSO_INI_FILE + + +SPARQL_CONF = { + "sparql_user": "dba", + "sparql_password": "dba", + "sparql_auth_uri": "http://localhost:8191/sparql-auth/", + "sparql_crud_auth_uri": "http://localhost:8191/sparql-graph-crud-auth/", + "sparql_endpoint": "http://localhost:8191/sparql/", +} -def get_sparql_auth_conf() -> dict: - """Fetch SPARQL auth configuration from the configurafrom flask - import configuration object.""" - # When loading from the environment, GN3_CONF precedes - # GN3_SECRETS. Don't change this order. - sparql_conf = config.Config("") - if os.environ.get("GN3_CONF"): - # Check whether GN3_CONF has been set, and ignore GN3_CONF - # otherwise. In CD, we use a mixed-text file, so we don't - # have an explicit PATH to point this to. - # https://git.genenetwork.org/gn-machines/tree/genenetwork-development.scm#n517 - sparql_conf.from_envvar("GN3_CONF") - # Set sane defaults for GN3_SECRETS to CD's secret file. In CD, - # this file is set in the genenetwork3 cd gexp: - # https://git.genenetwork.org/gn-machines/tree/genenetwork-development.scm#n516 - # However, during testing GN3_SECRETS isn't set; and by default, - # we run guix's default tests for python projects: `pytest` - # https://git.genenetwork.org/guix-bioinformatics/tree/gn/packages/genenetwork.scm#n182 - if os.environ.get("GN3_SECRETS"): - sparql_conf.from_envvar("GN3_SECRETS") - # If the sparql configurations aren't loaded, set sane defaults. - # This way, the genenetwork3 package builds. - return { - "sparql_user": sparql_conf.get("SPARQL_USER", "dba"), - "sparql_auth_uri": sparql_conf.get( - "SPARQL_AUTH_URI", "http://localhost:8890/sparql-auth/" - ), - "sparql_crud_auth_uri": sparql_conf.get( - "SPARQL_CRUD_AUTH_URI", "http://localhost:8890/sparql-graph-crud-auth" - ), - "sparql_endpoint": sparql_conf.get("SPARQL_ENDPOINT", "http://localhost:8890"), - "sparql_password": sparql_conf.get("SPARQL_PASSWORD", "dba"), - } +def get_process_id(name) -> list: + """Return process ids found by (partial) name or regex. -# XXXX: Currently we run the tests against CD's virtuoso instance. -# This is not idempotent. Consider having a special virtuoso instance -# just for running tests. -@pytest.fixture(scope="module") + >>> get_process_id('kthreadd') + [2] + >>> get_process_id('watchdog') + [10, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61] # ymmv + >>> get_process_id('non-existent process') + [] + """ + with subprocess.Popen( + ["pgrep", "-f", name], stdout=subprocess.PIPE, shell=False + ) as proc: + response = proc.communicate()[0] + return [int(pid) for pid in response.split()] + + +@pytest.fixture(scope="session") def rdf_setup(): """Upload RDF to a Virtuoso named graph""" - # Define the URL and file - sparql_conf = get_sparql_auth_conf() - url = sparql_conf["sparql_crud_auth_uri"] + dir_path = os.path.dirname(__file__).split("fixtures")[0] file_path = os.path.join( - os.path.dirname(__file__).split("fixtures")[0], + dir_path, "test_data/ttl-files/test-data.ttl", ) + # We intentionally use a temporary directory. This way, all the + # database created by virtuoso are cleaned after running tests. + with tempfile.TemporaryDirectory() as tmpdirname: + init_file = os.path.join(tmpdirname, "virtuoso.ini") + # Create the virtuoso init file which we use when + # bootstrapping virtuoso. + with open(init_file, "w", encoding="utf-8") as file_: + file_.write(Template(VIRTUOSO_INI_FILE).substitute( + dir_path=tmpdirname)) + # Here we intentionally ignore the "+foreground" option to + # allow virtuoso to run in the background. + with subprocess.Popen( + [ + "virtuoso-t", + "+wait", + "+no-checkpoint", + "+configfile", + init_file, + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) as pid: + pid.wait() + # Define the query parameters and authentication + params = {"graph": "http://cd-test.genenetwork.org"} + auth = HTTPDigestAuth("dba", "dba") - # Define the query parameters and authentication - params = {"graph": "http://cd-test.genenetwork.org"} - auth = HTTPDigestAuth( - sparql_conf["sparql_user"], sparql_conf["sparql_password"]) - - # Make sure this graph does not exist before running anything - requests.delete(url, params=params, auth=auth) + # Make sure this graph does not exist before running anything + requests.delete( + SPARQL_CONF["sparql_crud_auth_uri"], params=params, auth=auth + ) - # Open the file in binary mode and send the request - with open(file_path, "rb") as file: - response = requests.put(url, params=params, auth=auth, data=file) - yield response - requests.delete(url, params=params, auth=auth) + # Open the file in binary mode and send the request + with open(file_path, "rb") as file: + response = requests.put( + SPARQL_CONF["sparql_crud_auth_uri"], + params=params, + auth=auth, + data=file, + ) + yield response + requests.delete( + SPARQL_CONF["sparql_crud_auth_uri"], params=params, auth=auth + ) + for pid_ in get_process_id(init_file): + psutil.Process(pid_).kill() |