diff options
| -rw-r--r-- | .guix-channel | 51 | ||||
| -rw-r--r-- | .guix/modules/gn-libs.scm | 2 | ||||
| -rw-r--r-- | gn_libs/http_logging.py | 56 | ||||
| -rw-r--r-- | gn_libs/jobs/jobs.py | 8 | ||||
| -rw-r--r-- | gn_libs/mysqldb.py | 14 | ||||
| -rw-r--r-- | gn_libs/privileges.py | 166 | ||||
| -rw-r--r-- | tests/unit/test_privileges_checking.py | 39 | ||||
| -rw-r--r-- | tests/unit/test_privileges_spec_parsing.py | 174 |
8 files changed, 496 insertions, 14 deletions
diff --git a/.guix-channel b/.guix-channel index 9476e74..2c37401 100644 --- a/.guix-channel +++ b/.guix-channel @@ -3,18 +3,55 @@ (directory ".guix/modules") (dependencies (channel + (name gn-machines) + (url "https://git.genenetwork.org/gn-machines") + (branch "main")) + ;; Until https://issues.guix.gnu.org/68797 is resolved, we need to + ;; explicitly list guix-bioinformatics, guix-forge, guix-past and + ;; guix-rust-past-crates—the dependencies of the gn-machines channel—here. + (channel + (name guix) + (url "https://codeberg.org/guix/guix") + (branch "master") + (commit "0a4740705090acc4c8a10d4f53afc58c9f62e980") + (introduction + (channel-introduction + (version 0) + (commit "9edb3f66fd807b096b48283debdcddccfea34bad") + (signer + "BBB0 2DDF 2CEA F6A8 0D1D E643 A2A0 6DF2 A33A 54FA")))) + (channel + (name guix-forge) + (url "https://git.systemreboot.net/guix-forge/") + (branch "main") + (commit "e43fd9a4d73654d3876e2c698af7da89f3408f89") + (introduction + (channel-introduction + (version 0) + (commit "0432e37b20dd678a02efee21adf0b9525a670310") + (signer + "7F73 0343 F2F0 9F3C 77BF 79D3 2E25 EE8B 6180 2BB3")))) + (channel (name guix-bioinformatics) (url "https://git.genenetwork.org/guix-bioinformatics") - (branch "master")) - ;; FIXME: guix-bioinformatics depends on guix-past. So, there - ;; should be no reason to explicitly depend on guix-past. But, the - ;; channel does not build otherwise. This is probably a guix bug. + (commit "903465c85c9b2ae28480b236c3364da873ca8f51")) (channel (name guix-past) - (url "https://gitlab.inria.fr/guix-hpc/guix-past") + (url "https://codeberg.org/guix-science/guix-past") + (branch "master") + (introduction + (channel-introduction + (version 0) + (commit "c3bc94ee752ec545e39c1b8a29f739405767b51c") + (signer + "3CE4 6455 8A84 FDC6 9DB4 0CFB 090B 1199 3D9A EBB5")))) + (channel + (name guix-rust-past-crates) + (url "https://codeberg.org/guix/guix-rust-past-crates.git") + (branch "trunk") (introduction (channel-introduction (version 0) - (commit "0c119db2ea86a389769f4d2b9c6f5c41c027e336") + (commit "1db24ca92c28255b28076792b93d533eabb3dc6a") (signer - "3CE4 6455 8A84 FDC6 9DB4 0CFB 090B 1199 3D9A EBB5")))))) + "F4C2 D1DF 3FDE EA63 D1D3 0776 ACC6 6D09 CA52 8292")))))) diff --git a/.guix/modules/gn-libs.scm b/.guix/modules/gn-libs.scm index 78b362b..8fa03db 100644 --- a/.guix/modules/gn-libs.scm +++ b/.guix/modules/gn-libs.scm @@ -1,5 +1,5 @@ (define-module (gn-libs) - #:use-module ((gn packages genenetwork) #:select (gn-libs) #:prefix gn:) + #:use-module ((gn-machines genenetwork) #:select (gn-libs) #:prefix gn:) #:use-module ((gnu packages check) #:select (python-pylint)) #:use-module ((gnu packages python-check) #:select (python-mypy)) #:use-module (guix gexp) diff --git a/gn_libs/http_logging.py b/gn_libs/http_logging.py new file mode 100644 index 0000000..79660a8 --- /dev/null +++ b/gn_libs/http_logging.py @@ -0,0 +1,56 @@ +"""Provide a way to emit logs to an HTTP endpoint""" +import logging +import json +import traceback +import urllib.request +from datetime import datetime + + +class SilentHTTPHandler(logging.Handler): + """A logging handler that emits logs to an HTTP endpoint silently. + + This handler converts log records to JSON and sends them via POST + to a specified HTTP endpoint. Failures are suppressed to avoid + interfering with the main application. + """ + def __init__(self, endpoint, timeout=0.1): + super().__init__() + self.endpoint = endpoint + self.timeout = timeout + + def emit(self, record): + try: + payload = { + "timestamp": datetime.utcfromtimestamp(record.created).isoformat(), + "level": record.levelname.lower(), + "logger": record.name, + "message": record.getMessage(), + } + for attr in ("remote_addr", "user_agent", "extra"): + if hasattr(record, attr): + payload.update({attr: getattr(record, attr)}) + + if record.exc_info: + payload["exception"] = "".join( + traceback.format_exception(*record.exc_info) + ) + + # fire-and-forget + self._send(payload) + + except Exception: + # absolute silence + pass + + def _send(self, payload): + try: + req = urllib.request.Request( + url=self.endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=5) as resp: + resp.read() # ignore body + except Exception: + pass diff --git a/gn_libs/jobs/jobs.py b/gn_libs/jobs/jobs.py index 8d77139..ec1c3a8 100644 --- a/gn_libs/jobs/jobs.py +++ b/gn_libs/jobs/jobs.py @@ -92,7 +92,7 @@ def initialise_job(# pylint: disable=[too-many-arguments, too-many-positional-ar command: list, job_type: str, extra_meta: Optional[dict] = None, - expiry_seconds: Optional[int] = _DEFAULT_EXPIRY_SECONDS_ + expiry_seconds: int = _DEFAULT_EXPIRY_SECONDS_ ) -> dict: """Initialise the job and put the details in a SQLite3 database.""" if extra_meta is None: @@ -115,7 +115,7 @@ def initialise_job(# pylint: disable=[too-many-arguments, too-many-positional-ar def output_file(jobid: uuid.UUID, outdir: Path, stream: str) -> Path: """Compute the path for the file where the launcher's `stream` output goes""" assert stream in ("stdout", "stderr"), f"Invalid stream '{stream}'" - return f"{outdir}/launcher_job_{jobid}.{stream}" + return outdir.joinpath(f"launcher_job_{jobid}.{stream}") stdout_filename = partial(output_file, stream="stdout") @@ -146,10 +146,10 @@ def launch_job( os.mkdir(error_dir) job_id = str(the_job["job_id"]) - with (open(stderr_filename(jobid=job_id, outdir=error_dir), + with (open(stderr_filename(jobid=the_job["job_id"], outdir=error_dir), "w", encoding="utf-8") as stderrfile, - open(stdout_filename(jobid=job_id, outdir=error_dir), + open(stdout_filename(jobid=the_job["job_id"], outdir=error_dir), "w", encoding="utf-8") as stdoutfile): subprocess.Popen( # pylint: disable=[consider-using-with] diff --git a/gn_libs/mysqldb.py b/gn_libs/mysqldb.py index fec3b30..3f6390e 100644 --- a/gn_libs/mysqldb.py +++ b/gn_libs/mysqldb.py @@ -9,7 +9,7 @@ import MySQLdb as mdb from MySQLdb.cursors import Cursor -_logger = logging.getLogger(__file__) +_logger = logging.getLogger(__name__) class InvalidOptionValue(Exception): """Raised whenever a parsed value is invalid for the specific option.""" @@ -46,6 +46,12 @@ def __parse_ssl_mode_options__(val: str) -> str: def __parse_ssl_options__(val: str) -> dict: + if val.strip() == "" or val.strip().lower() == "false": + return False + + if val.strip().lower() == "true": + return True + allowed_keys = ("key", "cert", "ca", "capath", "cipher") opts = { key.strip(): val.strip() for key,val in @@ -61,6 +67,7 @@ def __parse_db_opts__(opts: str) -> dict: This assumes use of python-mysqlclient library.""" allowed_opts = ( + # See: https://mysqlclient.readthedocs.io/user_guide.html#functions-and-attributes "unix_socket", "connect_timeout", "compress", "named_pipe", "init_command", "read_default_file", "read_default_group", "cursorclass", "use_unicode", "charset", "collation", "auth_plugin", @@ -124,7 +131,10 @@ class Connection(Protocol): @contextlib.contextmanager def database_connection(sql_uri: str, logger: logging.Logger = _logger) -> Iterator[Connection]: """Connect to MySQL database.""" - connection = mdb.connect(**parse_db_url(sql_uri)) + _conn_opts = parse_db_url(sql_uri) + _logger.debug("Connecting to database with the following options: %s", + _conn_opts) + connection = mdb.connect(**_conn_opts) try: yield connection connection.commit() diff --git a/gn_libs/privileges.py b/gn_libs/privileges.py new file mode 100644 index 0000000..32c943d --- /dev/null +++ b/gn_libs/privileges.py @@ -0,0 +1,166 @@ +"""Utilities for handling privileges.""" +import logging +from functools import reduce +from typing import Union, Sequence, Iterator, TypeAlias, TypedDict + +logger = logging.getLogger(__name__) + +Operator: TypeAlias = str # Valid operators: "AND", "OR" +Privilege: TypeAlias = str +PrivilegesList: TypeAlias = Sequence[Privilege] +ParseTree = tuple[Operator, + # Leaves (`PrivilegesList` objects) on the left, + # trees (`ParseTree` objects) on the right + Union[PrivilegesList, tuple[PrivilegesList, 'ParseTree']]] + + +class SpecificationValueError(ValueError): + """Raised when there is an error in the specification string.""" + + +_OPERATORS_ = ("OR", "AND") +_EMPTY_SPEC_ERROR_ = SpecificationValueError( + "Empty specification. I do not know what to do.") + + +def __add_leaves__( + index: int, + tree: tuple[Operator], + leaves: dict +) -> Union[tuple[Operator], Union[ParseTree, tuple]]: + """Add leaves to the tree.""" + if leaves.get(index): + return tree + (leaves[index],) + return tree + (tuple()) + + +class ParsingState(TypedDict): + """Class to create a state object. Mostly used to silence MyPy""" + tokens: list[str] + trees: list[tuple[int, int, str, int, int]]#(name, parent, operator, start, end) + open_parens: int + current_tree: int + leaves: dict[int, tuple[str, ...]]#[parent-tree, [index, index, ...]] + + +def __build_tree__(tree_state: ParsingState) -> ParseTree: + """Given computed state, build the actual tree.""" + _built = [] + for idx, tree in enumerate(tree_state["trees"]): + _built.append(__add_leaves__(idx, (tree[2],), tree_state["leaves"])) + + logger.debug("Number of built trees: %s, %s", len(_built), _built) + _num_trees = len(_built) + for idx in range(0, _num_trees): + _last_tree = _built.pop() + logger.debug("LAST TREE: %s, %s", _last_tree, len(_last_tree)) + if len(_last_tree) <= 1:# Has no leaves or subtrees + _last_tree = None# type: ignore[assignment] + continue# more evil + _name = tree_state["trees"][_num_trees - 1 - idx][0] + _parent = tree_state["trees"][ + tree_state["trees"][_num_trees - 1 - idx][1]] + _op = tree_state["trees"][_num_trees - 1 - idx][2] + logger.debug("TREE => name: %s, operation: %s, parent: %s", + _name, _op, _parent) + if _name != _parent[0]:# not root tree + if _op == _parent[2]: + _built[_parent[0]] = ( + _built[_parent[0]][0],# Operator + _built[_parent[0]][1] + _last_tree[1]# merge leaves + ) + _last_tree[2:]#Add any trees left over + else: + _built[_parent[0]] += (_last_tree,) + + if _last_tree is None: + raise _EMPTY_SPEC_ERROR_ + return _last_tree + + +def __parse_tree__(tokens: Iterator[str]) -> ParseTree: + """Parse the tokens into a tree.""" + _state = ParsingState( + tokens=[], trees=[], open_parens=0, current_tree=0, leaves={}) + for _idx, _token in enumerate(tokens): + _state["tokens"].append(_token) + + if _idx==0: + if _token[1:].upper() not in _OPERATORS_: + raise SpecificationValueError(f"Invalid operator: {_token[1:]}") + _state["open_parens"] += 1 + _state["trees"].append((0, 0, _token[1:].upper(), _idx, -1)) + _state["current_tree"] = 0 + continue# this is bad! + + if _token == ")":# end a tree + logger.debug("ENDING A TREE: %s", _state) + _state["open_parens"] -= 1 + _state["trees"][_state["current_tree"]] = ( + _state["trees"][_state["current_tree"]][0:-1] + (_idx,)) + # We go back to the parent below. + _state["current_tree"] = _state["trees"][_state["current_tree"]][1] + continue# still really bad! + + if _token[1:].upper() in _OPERATORS_:# new child tree + _state["open_parens"] += 1 + _state["trees"].append((len(_state["trees"]), + _state["current_tree"], + _token[1:].upper(), + _idx, + -1)) + _state["current_tree"] = len(_state["trees"]) - 1 + continue# more evil still + + logger.debug("state: %s", _state) + # leaves + _state["leaves"][_state["current_tree"]] = _state["leaves"].get( + _state["current_tree"], tuple()) + (_token,) + + # Build parse-tree from state + if _state["open_parens"] != 0: + raise SpecificationValueError("Unbalanced parentheses.") + return __build_tree__(_state) + + +def __tokenise__(spec: str) -> Iterator[str]: + """Clean up and tokenise the string.""" + return (token.strip() + for token in spec.replace( + "(", " (" + ).replace( + ")", " ) " + ).replace( + "( ", "(" + ).split()) + + +def parse(spec: str) -> ParseTree: + """Parse a string specification for privileges and return a tree of data + objects of the form (<operator> (<check>))""" + if spec.strip() == "": + raise _EMPTY_SPEC_ERROR_ + + return __parse_tree__(__tokenise__(spec)) + + +def __make_checker__(check_fn): + def __checker__(privileges, *checks): + def __check__(acc, curr): + if curr[0] in _OPERATORS_: + return acc + (_OPERATOR_FUNCTION_[curr[0]]( + privileges, *curr[1:]),) + return acc + (check_fn((priv in privileges) for priv in curr),) + results = reduce(__check__, checks, tuple()) + return len(results) > 0 and check_fn(results) + + return __checker__ + + +_OPERATOR_FUNCTION_ = { + "OR": __make_checker__(any), + "AND": __make_checker__(all) +} +def check(spec: str, privileges: tuple[str, ...]) -> bool: + """Check that the sequence of `privileges` satisfies `spec`.""" + _spec = parse(spec) + return _OPERATOR_FUNCTION_[_spec[0]](privileges, *_spec[1:]) diff --git a/tests/unit/test_privileges_checking.py b/tests/unit/test_privileges_checking.py new file mode 100644 index 0000000..05969b1 --- /dev/null +++ b/tests/unit/test_privileges_checking.py @@ -0,0 +1,39 @@ +"""Tests to verify the privileges check works.""" +import pytest + +from gn_libs.privileges import check + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,privileges,expected", + (("(AND priv1 priv2 priv3)", + ("priv2", "priv4", "priv1", "priv6", "priv3"), + True), + ("(AND priv1 priv2 priv3)", + ("priv1", "priv2", "priv4" "priv6"), + False), + ("(AND priv1 priv2 priv3)", + ("priv1", "priv4" "priv6"), + False), + ("(AND priv1 priv2 priv3)", + ("priv4", "priv5", "priv6"), + False), + ("(OR priv1 priv2 priv3)", + ("priv1", "priv2" "priv3"), + True), + ("(OR priv1 priv2 priv3)", + ("priv1", "priv2" "priv4", "priv5"), + True), + ("(OR priv1 priv2 priv3)", + ("priv1", "priv4", "priv5"), + True), + ("(OR priv1 priv2 priv3)", + ("priv4", "priv5", "priv6"), + False))) +def test_check(spec, privileges, expected): + """ + GIVEN: A privileges-check specification, and a tuple of privileges + WHEN: A check is performed + THEN: Verify that the check returns the expected value + """ + assert check(spec, privileges) == expected diff --git a/tests/unit/test_privileges_spec_parsing.py b/tests/unit/test_privileges_spec_parsing.py new file mode 100644 index 0000000..124f570 --- /dev/null +++ b/tests/unit/test_privileges_spec_parsing.py @@ -0,0 +1,174 @@ +"""Tests for parsing the privileges checks specification.""" +import pytest + +from gn_libs.privileges import parse, SpecificationValueError + + +## NOTE: Should we limit depth of nesting of checks, e.g. don't do more than +## 3 levels or so? + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec", + ("", + "(AND)", + "(AND (OR))", + "(OR (AND))", + "(OR (AND (OR (AND ))))")) +def test_empty_spec(spec): + """ + GIVEN: An effectively empty specification + WHEN: The specification is parsed + THEN: Raise a `SpecificationValueError` + """ + with pytest.raises(SpecificationValueError): + parse(spec) + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(AND priv1)", ("AND", ("priv1",))), + ("(AND priv1 priv2)", ("AND", ("priv1", "priv2"))), + ("(AND priv1 priv2 priv3)", ("AND", ("priv1", "priv2", "priv3"))), + ("(and priv1)", ("AND", ("priv1",))), + ("(and priv1 priv2)", ("AND", ("priv1", "priv2"))), + ("(and priv1 priv2 priv3)", ("AND", ("priv1", "priv2", "priv3"))), + ("(and priv1 priv2 (and priv3 priv4))", + ("AND", ("priv1", "priv2", "priv3", "priv4"))))) +def test_and(spec, expected): + """ + GIVEN: A simple 'AND' privileges check specification `spec` + WHEN: The specification is parsed + THEN: Verify the parsed output gives an 'AND' check object + """ + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(OR priv1)", ("OR", ("priv1",))), + ("(OR priv1 priv2)", ("OR", ("priv1", "priv2"))), + ("(OR priv1 priv2 priv3)", ("OR", ("priv1", "priv2", "priv3"))), + ("(or priv1)", ("OR", ("priv1",))), + ("(or priv1 priv2)", ("OR", ("priv1", "priv2"))), + ("(or priv1 priv2 priv3)", ("OR", ("priv1", "priv2", "priv3"))))) +def test_or(spec, expected): + """ + GIVEN: A simple 'OR' privileges check specification `spec` + WHEN: The specification is parsed + THEN: Verify the parsed output gives an 'OR' check object + """ + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(or priv1 priv2 (or priv3 priv4))", + ("OR", ("priv1", "priv2", "priv3", "priv4"))), + ("(and priv1 priv2 (and priv3 priv4))", + ("AND", ("priv1", "priv2", "priv3", "priv4"))))) +def test_merging(spec, expected): + """ + GIVEN: + - A nested specification where 2 or more of subsequent operators are + - the same + WHEN: The specification is parsed + THEN: Verify the parsed output merges the checks into a single object + """ + # NOTE: The "given-when-then" description above does not please me. + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(AND priv1 (or priv2 priv3))", + ("AND", ("priv1",), ("OR", ("priv2", "priv3")))),)) +def test_and_or(spec, expected): + """ + GIVEN: + - A specification beginning with an "AND" operator followed by an "OR" + - operator + WHEN: The specification is parsed + THEN: Verify the parsed output is correct + """ + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(OR priv1 priv2 priv3 (and priv4 priv5))", + ("OR", ("priv1", "priv2", "priv3"), ("AND", ("priv4", "priv5")))),)) +def test_or_and(spec, expected): + """ + GIVEN: + - A specification beginning with an "OR" operator followed by an "AND" + - operator + WHEN: The specification is parsed + THEN: Verify the parsed output is correct + """ + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec", + ("()", + "this is invalid", + "priv1 AND priv2", + "(priv1 AND priv2)", + "(AND priv1 priv2 priv3")) +def test_invalid(spec): + """ + GIVEN: An invalid specification + WHEN: The specification is parsed + THEN: Verify that the `SpecificationValueError` is raised + """ + # NOTE: Maybe use hypothesis to generate random strings? + with pytest.raises(SpecificationValueError): + assert parse(spec) + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + (("(AND priv1 (or priv2 priv3) priv4 (and priv5 priv6))", + ("AND", + ("priv1", "priv4", "priv5", "priv6"), + ("OR", ("priv2", "priv3")))),)) +def test_complex(spec, expected): + """ + GIVEN: An valid, but more complex specification + WHEN: The specification is parsed + THEN: Verify that the specification parses correctly + """ + assert parse(spec) == expected + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "spec,expected", + ( + # -- We need to be careful about reduction -- + # -- Please revisit your boolean logic to verify -- + # -- how to reduce boolean statements. -- + # ("(AND priv1 (or priv2 priv3) priv4 (or priv5 priv6))", + # ("AND", + # ("priv1", "priv4"), + # ("OR", ("priv2", "priv3", "priv5", "priv6")))), + ("(OR priv1 (or priv2 priv3 (or priv4 priv5)) (or priv6 priv7))", + ("OR", ("priv1", "priv6", "priv7", "priv2", "priv3", "priv4", "priv5"))),)) +def test_reduction(spec, expected): + """ + GIVEN: A spec that can be reduced + WHEN: The specification is parsed + THEN: + - Verify that after parsing, it is reduced to the minimum number of levels + - possible. + """ + assert parse(spec) == expected |
