diff options
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/api/correlation.py | 2 | ||||
-rw-r--r-- | gn3/api/heatmaps.py | 2 | ||||
-rw-r--r-- | gn3/api/menu.py | 2 | ||||
-rw-r--r-- | gn3/api/metadata.py | 349 | ||||
-rw-r--r-- | gn3/api/metadata_api/wiki.py | 119 | ||||
-rw-r--r-- | gn3/api/rqtl.py | 4 | ||||
-rw-r--r-- | gn3/api/search.py | 25 | ||||
-rw-r--r-- | gn3/case_attributes.py | 96 | ||||
-rw-r--r-- | gn3/db/constants.py | 152 | ||||
-rw-r--r-- | gn3/db/rdf.py | 126 | ||||
-rw-r--r-- | gn3/db/wiki.py | 80 | ||||
-rw-r--r-- | gn3/db_utils.py | 7 | ||||
-rw-r--r-- | gn3/errors.py | 42 | ||||
-rw-r--r-- | gn3/oauth2/__init__.py | 1 | ||||
-rw-r--r-- | gn3/oauth2/authorisation.py | 34 | ||||
-rw-r--r-- | gn3/oauth2/errors.py | 8 | ||||
-rw-r--r-- | gn3/oauth2/jwks.py | 36 | ||||
-rw-r--r-- | gn3/settings.py | 4 |
18 files changed, 648 insertions, 441 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index eb4cc7d..c77dd93 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -64,7 +64,7 @@ def compute_lit_corr(species=None, gene_id=None): might be needed for actual computing of the correlation results """ - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: target_traits_gene_ids = request.get_json() target_trait_gene_list = list(target_traits_gene_ids.items()) diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py index 632c54a..172d555 100644 --- a/gn3/api/heatmaps.py +++ b/gn3/api/heatmaps.py @@ -24,7 +24,7 @@ def clustered_heatmaps(): return jsonify({ "message": "You need to provide at least two trait names." }), 400 - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: def parse_trait_fullname(trait): name_parts = trait.split(":") return f"{name_parts[1]}::{name_parts[0]}" diff --git a/gn3/api/menu.py b/gn3/api/menu.py index 58b761e..377ac6b 100644 --- a/gn3/api/menu.py +++ b/gn3/api/menu.py @@ -10,5 +10,5 @@ menu = Blueprint("menu", __name__) @menu.route("/generate/json") def generate_json(): """Get the menu in the JSON format""" - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: return jsonify(gen_dropdown_json(conn)) diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py index 91dc115..3f28f5d 100644 --- a/gn3/api/metadata.py +++ b/gn3/api/metadata.py @@ -5,7 +5,6 @@ from string import Template from pathlib import Path from authlib.jose import jwt - from flask import Blueprint from flask import request from flask import current_app @@ -14,135 +13,20 @@ from gn3.auth.authorisation.errors import AuthorisationError from gn3.db.datasets import (retrieve_metadata, save_metadata, get_history) -from gn3.db.rdf import RDF_PREFIXES from gn3.db.rdf import (query_frame_and_compact, - query_and_compact, - query_and_frame) - - -BASE_CONTEXT = { - "data": "@graph", - "id": "@id", - "type": "@type", - "gnc": "http://genenetwork.org/category/", - "gnt": "http://genenetwork.org/term/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", -} - -DATASET_CONTEXT = { - "accessRights": "dct:accessRights", - "accessionId": "dct:identifier", - "acknowledgement": "gnt:hasAcknowledgement", - "altLabel": "skos:altLabel", - "caseInfo": "gnt:hasCaseInfo", - "classifiedUnder": "xkos:classifiedUnder", - "contributors": "dct:creator", - "contactPoint": "dcat:contactPoint", - "created": "dct:created", - "dcat": "http://www.w3.org/ns/dcat#", - "dct": "http://purl.org/dc/terms/", - "description": "dct:description", - "ex": "http://example.org/stuff/1.0/", - "experimentDesignInfo": "gnt:hasExperimentDesignInfo", - "experimentType": "gnt:hasExperimentType", - "foaf": "http://xmlns.com/foaf/0.1/", - "geoSeriesId": "gnt:hasGeoSeriesId", - "gnt": "http://genenetwork.org/term/", - "inbredSet": "gnt:belongsToGroup", - "label": "rdfs:label", - "normalization": "gnt:usesNormalization", - "platformInfo": "gnt:hasPlatformInfo", - "notes": "gnt:hasNotes", - "organization": "foaf:Organization", - "prefLabel": "skos:prefLabel", - "citation": "dct:isReferencedBy", - "GoTree": "gnt:hasGOTreeValue", - "platform": "gnt:usesPlatform", - "processingInfo": "gnt:hasDataProcessingInfo", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "skos": "http://www.w3.org/2004/02/skos/core#", - "specifics": "gnt:hasContentInfo", - "title": "dct:title", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", - "tissueInfo": "gnt:hasTissueInfo", - "tissue": "gnt:hasTissue", - "contactWebUrl": "foaf:homepage", - "contactName": "foaf:name", -} - -SEARCH_CONTEXT = { - "pages": "ex:pages", - "hits": "ex:hits", - "result": "ex:result", - "results": "ex:items", - "resultItem": "ex:resultType", - "currentPage": "ex:currentPage", -} - -DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | { - "classifiedUnder": "xkos:classifiedUnder", - "created": "dct:created", - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "inbredSet": "ex:belongsToInbredSet", - "title": "dct:title", - "name": "rdfs:label", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "type": "@type", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", -} + query_and_compact) +from gn3.db.constants import ( + RDF_PREFIXES, BASE_CONTEXT, + DATASET_CONTEXT, + DATASET_SEARCH_CONTEXT, PUBLICATION_CONTEXT, + PHENOTYPE_CONTEXT +) -PUBLICATION_CONTEXT = { - "dct": "http://purl.org/dc/terms/", - "fabio": "http://purl.org/spar/fabio/", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "title": "dct:title", - "journal": "fabio:Journal", - "volume": "prism:volume", - "page": "fabio:page", - "creator": "dct:creator", - "abstract": "dct:abstract", - "year": { - "@id": "fabio:hasPublicationYear", - "@type": "xsd:gYear", - }, - "month": { - "@id": "prism:publicationDate", - "@type": "xsd:gMonth" - }, -} +from gn3.api.metadata_api import wiki -PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | { - "skos": "http://www.w3.org/2004/02/skos/core#", - "dcat": "http://www.w3.org/ns/dcat#", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "traitName": "skos:altLabel", - "trait": "rdfs:label", - "altName": "rdfs:altLabel", - "description": "dct:description", - "abbreviation": "gnt:abbreviation", - "labCode": "gnt:labCode", - "submitter": "gnt:submitter", - "dataset": "dcat:Distribution", - "contributor": "dct:contributor", - "mean": "gnt:mean", - "locus": "gnt:locus", - "lodScore": "gnt:lodScore", - "references": "dct:isReferencedBy", - "additive": "gnt:additive", - "sequence": "gnt:sequence", - "prefLabel": "skos:prefLabel", - "identifier": "dct:identifier", - "chromosome": "gnt:chr", - "mb": "gnt:mb", - "peakLocation": "gnt:locus", - "species": "gnt:belongsToSpecies", - "group": "gnt:belongsToGroup", -} metadata = Blueprint("metadata", __name__) +metadata.register_blueprint(wiki.wiki_blueprint) @metadata.route("/datasets/<name>", methods=["GET"]) @@ -208,7 +92,7 @@ CONSTRUCT { (Path( current_app.config.get("DATA_DIR") ) / "gn-docs/general/datasets" / - Path(__result.get("id", "")).stem).as_posix() + Path(__result.get("id", "")).stem).as_posix() ) @@ -348,69 +232,6 @@ def edit_dataset(): lambda x: ("Edit successfull", 201) ) -@metadata.route("/datasets/search/<term>", methods=["GET"]) -def search_datasets(term): - """Search datasets""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ex:result rdf:type ex:resultType ; - ex:pages ?pages ; - ex:hits ?hits ; - ex:currentPage $offset ; - ex:items [ - rdfs:label ?label ; - dct:title ?title ; - ex:belongsToInbredSet ?inbredSetName ; - xkos:classifiedUnder ?datasetType ; - ] -} WHERE { -{ - SELECT DISTINCT ?dataset ?label ?inbredSetName ?datasetType ?title - WHERE { - ?dataset rdf:type dcat:Dataset ; - rdfs:label ?label ; - ?datasetPredicate ?datasetObject ; - xkos:classifiedUnder ?inbredSet . - ?inbredSet ^skos:member gnc:Set ; - rdfs:label ?inbredSetName . - ?datasetObject bif:contains "'$term'" . - OPTIONAL { - ?dataset dct:title ?title . - } . - OPTIONAL { - ?classification ^xkos:classifiedUnder ?dataset ; - ^skos:member gnc:DatasetType ; - ?typePredicate ?typeName ; - skos:prefLabel ?datasetType . - } - } ORDER BY ?dataset LIMIT $limit OFFSET $offset -} - -{ - SELECT (COUNT(DISTINCT ?dataset)/$limit+1 AS ?pages) - (COUNT(DISTINCT ?dataset) AS ?hits) WHERE { - ?dataset rdf:type dcat:Dataset ; - ?p ?o . - ?o bif:contains "'$term'" . - } -} - -} -""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | DATASET_SEARCH_CONTEXT, - "type": "resultItem", - } - return query_frame_and_compact( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - @metadata.route("/publications/<name>", methods=["GET"]) def publications(name): @@ -436,65 +257,6 @@ CONSTRUCT { ) -@metadata.route("/publications/search/<term>", methods=["GET"]) -def search_publications(term): - """Search publications""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ex:result rdf:type ex:resultType ; - ex:totalCount ?totalCount ; - ex:currentPage $offset ; - ex:items [ - rdfs:label ?publication ; - dct:title ?title ; - ] -} WHERE { -{ - SELECT ?publication ?title ?pmid WHERE { - ?pub rdf:type fabio:ResearchPaper ; - ?predicate ?object ; - dct:title ?title . - ?object bif:contains "'$term'" . - BIND( STR(?pub) AS ?publication ) . - } ORDER BY ?title LIMIT $limit OFFSET $offset - } -{ - SELECT (COUNT(*)/$limit+1 AS ?totalCount) WHERE { - ?publication rdf:type fabio:ResearchPaper ; - ?predicate ?object . - ?object bif:contains "'$term'" . - } -} -} -""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | SEARCH_CONTEXT | { - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "fabio": "http://purl.org/spar/fabio/", - "title": "dct:title", - "pubmed": "fabio:hasPubMedId", - "currentPage": "ex:currentPage", - "url": "rdfs:label", - }, - "type": "resultItem", - "paper": { - "@type": "fabio:ResearchPaper", - "@container": "@index" - } - } - return query_and_frame( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - - @metadata.route("/phenotypes/<name>", methods=["GET"]) @metadata.route("/phenotypes/<group>/<name>", methods=["GET"]) def phenotypes(name, group=None): @@ -630,97 +392,6 @@ CONSTRUCT { ) -@metadata.route("/genewikis/gn/<symbol>", methods=["GET"]) -def get_gn_genewiki_entries(symbol): - """Fetch the GN and NCBI GeneRIF entries""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ?symbol ex:entries [ - rdfs:comment ?comment ; - ex:species ?species_ ; - dct:created ?createTime ; - dct:references ?pmids ; - dct:creator ?creator ; - gnt:belongsToCategory ?categories ; - ] . - ?symbol rdf:type gnc:GNWikiEntry ; - ex:totalCount ?totalCount ; - ex:currentPage $offset . -} WHERE { -{ - SELECT ?symbol ?comment - (GROUP_CONCAT(DISTINCT ?speciesName; SEPARATOR='; ') AS ?species_) - ?createTime ?creator - (GROUP_CONCAT(DISTINCT ?pubmed; SEPARATOR='; ') AS ?pmids) - (GROUP_CONCAT(DISTINCT ?category; SEPARATOR='; ') AS ?categories) - WHERE { - ?symbol rdfs:label ?label ; - rdfs:comment _:entry . - ?label bif:contains "'$symbol'" . - _:entry rdf:type gnc:GNWikiEntry ; - rdfs:comment ?comment . - OPTIONAL { - ?species ^xkos:classifiedUnder _:entry ; - ^skos:member gnc:Species ; - skos:prefLabel ?speciesName . - } . - OPTIONAL { _:entry dct:created ?createTime . } . - OPTIONAL { _:entry dct:references ?pubmed . } . - OPTIONAL { - ?investigator foaf:name ?creator ; - ^dct:creator _:entry . - } . - OPTIONAL { _:entry gnt:belongsToCategory ?category . } . - } GROUP BY ?comment ?symbol ?createTime - ?creator ORDER BY ?createTime LIMIT $limit OFFSET $offset -} - -{ - SELECT (COUNT(DISTINCT ?comment)/$limit+1 AS ?totalCount) WHERE { - ?symbol rdfs:comment _:entry ; - rdfs:label ?label . - _:entry rdfs:comment ?comment ; - rdf:type gnc:GNWikiEntry . - ?label bif:contains "'$symbol'" . - } -} -} -""").substitute(prefix=RDF_PREFIXES, symbol=symbol, - limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | { - "ex": "http://example.org/stuff/1.0/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "gnt": "http://genenetwork.org/term/", - "gnc": "http://genenetwork.org/category/", - "dct": "http://purl.org/dc/terms/", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "entries": "ex:entries", - "comment": "rdfs:comment", - "species": "ex:species", - "category": 'gnt:belongsToCategory', - "author": "dct:creator", - "pubmed": "dct:references", - "currentPage": "ex:currentPage", - "pages": "ex:totalCount", - "created": { - "@id": "dct:created", - "@type": "xsd:datetime" - }, - }, - "type": "gnc:GNWikiEntry" - } - return query_frame_and_compact( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - - @metadata.route("/genewikis/ncbi/<symbol>", methods=["GET"]) def get_ncbi_genewiki_entries(symbol): """Fetch the NCBI GeneRIF entries""" diff --git a/gn3/api/metadata_api/wiki.py b/gn3/api/metadata_api/wiki.py new file mode 100644 index 0000000..a4abef6 --- /dev/null +++ b/gn3/api/metadata_api/wiki.py @@ -0,0 +1,119 @@ +"""API for accessing/editting wiki metadata""" + +import datetime +from typing import Any, Dict +from flask import Blueprint, request, jsonify, current_app, make_response +from gn3 import db_utils +from gn3.db import wiki +from gn3.db.rdf import (query_frame_and_compact, + get_wiki_entries_by_symbol) + + +wiki_blueprint = Blueprint("wiki", __name__, url_prefix="wiki") + + +@wiki_blueprint.route("/<int:comment_id>/edit", methods=["POST"]) +def edit_wiki(comment_id: int): + """Edit wiki comment. This is achieved by adding another entry with a new VersionId""" + # FIXME: attempt to check and fix for types here with relevant errors + payload: Dict[str, Any] = request.json # type: ignore + pubmed_ids = [str(x) for x in payload.get("pubmed_ids", [])] + + insert_dict = { + "Id": comment_id, + "symbol": payload["symbol"], + "PubMed_ID": " ".join(pubmed_ids), + "comment": payload["comment"], + "email": payload["email"], + "createtime": datetime.datetime.now(datetime.timezone.utc).strftime( + "%Y-%m-%d %H:%M" + ), + "user_ip": request.environ.get("HTTP_X_REAL_IP", request.remote_addr), + "weburl": payload.get("web_url"), + "initial": payload.get("initial"), + "reason": payload["reason"], + } + + insert_query = """ + INSERT INTO GeneRIF (Id, versionId, symbol, PubMed_ID, SpeciesID, comment, + email, createtime, user_ip, weburl, initial, reason) + VALUES (%(Id)s, %(versionId)s, %(symbol)s, %(PubMed_ID)s, %(SpeciesID)s, %(comment)s, %(email)s, %(createtime)s, %(user_ip)s, %(weburl)s, %(initial)s, %(reason)s) + """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + try: + category_ids = wiki.get_categories_ids( + cursor, payload["categories"]) + species_id = wiki.get_species_id(cursor, payload["species"]) + next_version = wiki.get_next_comment_version(cursor, comment_id) + except wiki.MissingDBDataException as missing_exc: + return jsonify(error=f"Error editting wiki entry, {missing_exc}"), 500 + insert_dict["SpeciesID"] = species_id + insert_dict["versionId"] = next_version + current_app.logger.debug(f"Running query: {insert_query}") + cursor.execute(insert_query, insert_dict) + category_addition_query = """ + INSERT INTO GeneRIFXRef (GeneRIFId, versionId, GeneCategoryId) + VALUES (%s, %s, %s) + """ + + for cat_id in category_ids: + current_app.logger.debug( + f"Running query: {category_addition_query}") + cursor.execute( + category_addition_query, (comment_id, + insert_dict["versionId"], cat_id) + ) + return jsonify({"success": "ok"}) + return jsonify(error="Error editing wiki entry, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/<string:symbol>", methods=["GET"]) +def get_wiki_entries(symbol: str): + """Fetch wiki entries""" + content_type = request.headers.get("Content-Type") + status_code = 200 + response = get_wiki_entries_by_symbol( + symbol=symbol, + sparql_uri=current_app.config["SPARQL_ENDPOINT"]) + data = response.get("data") + if not data: + data = {} + status_code = 404 + if content_type == "application/ld+json": + payload = make_response(response) + payload.headers["Content-Type"] = "application/ld+json" + return payload, status_code + return jsonify(data), status_code + + +@wiki_blueprint.route("/<int:comment_id>", methods=["GET"]) +def get_wiki(comment_id: int): + """ + Gets latest wiki comments. + + TODO: fetch this from RIF + """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + return jsonify(wiki.get_latest_comment(conn, comment_id)) + return jsonify(error="Error fetching wiki entry, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/categories", methods=["GET"]) +def get_categories(): + """ Gets list of supported categories for RIF """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + categories_dict = wiki.get_categories(cursor) + return jsonify(categories_dict) + return jsonify(error="Error getting categories, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/species", methods=["GET"]) +def get_species(): + """ Gets list of all species, contains name and SpeciesName """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + species_dict = wiki.get_species(cursor) + return jsonify(species_dict) + return jsonify(error="Error getting species, most likely due to DB error!"), 500 diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 70ebe12..ae0110d 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,11 +25,11 @@ run the rqtl_wrapper script and return the results as JSON raise FileNotFoundError # Split kwargs by those with values and boolean ones that just convert to True/False - kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"] + kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control"] boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs - rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + rqtl_kwargs = {"geno": genofile, "pheno": phenofile, "outdir": current_app.config.get("TMPDIR")} rqtl_bool_kwargs = [] for kwarg in all_kwargs: if kwarg in request.form: diff --git a/gn3/api/search.py b/gn3/api/search.py index c741b15..f696428 100644 --- a/gn3/api/search.py +++ b/gn3/api/search.py @@ -194,23 +194,36 @@ def parse_location_field(species_query: xapian.Query, .maybe(xapian.Query.MatchNothing, make_query)) +def parse_boolean_prefixed_field(prefix: str, query: bytes) -> xapian.Query: + """Parse boolean prefixed field and return a xapian query.""" + # For some reason, xapian does not stem boolean prefixed fields + # when the query starts with a capital letter. We need it to stem + # always. Hence this function. + return xapian.Query(prefix + query.decode("utf-8").lower()) + + # pylint: disable=too-many-locals def parse_query(synteny_files_directory: Path, query: str): """Parse search query using GeneNetwork specific field processors.""" queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) - queryparser.set_stemming_strategy(queryparser.STEM_SOME) + queryparser.set_stemming_strategy(queryparser.STEM_ALL_Z) species_prefix = "XS" chromosome_prefix = "XC" queryparser.add_boolean_prefix("author", "A") queryparser.add_boolean_prefix("species", species_prefix) - queryparser.add_boolean_prefix("group", "XG") + queryparser.add_boolean_prefix("group", + FieldProcessor(partial(parse_boolean_prefixed_field, "XG"))) queryparser.add_boolean_prefix("tissue", "XI") queryparser.add_boolean_prefix("dataset", "XDS") queryparser.add_boolean_prefix("symbol", "XY") queryparser.add_boolean_prefix("chr", chromosome_prefix) queryparser.add_boolean_prefix("peakchr", "XPC") queryparser.add_prefix("description", "XD") + queryparser.add_prefix("rif", "XRF") + queryparser.add_prefix("wiki", "XWK") + queryparser.add_prefix("RIF", "XRF") + queryparser.add_prefix("WIKI", "XWK") range_prefixes = ["mean", "peak", "position", "peakmb", "additive", "year"] for i, prefix in enumerate(range_prefixes): # Treat position specially since it needs its own field processor. @@ -263,11 +276,13 @@ def search_results(): if page < 1: abort(404, description="Requested page does not exist") results_per_page = args.get("per_page", default=100, type=int) - maximum_results_per_page = 10000 + maximum_results_per_page = 50000 if results_per_page > maximum_results_per_page: abort(400, description="Requested too many search results") - - query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring) + try: + query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring) + except xapian.QueryParserError as err: + return jsonify({"error_type": str(err.get_type()), "error": err.get_msg()}), 400 traits = [] # pylint: disable=invalid-name with xapian_database(current_app.config["XAPIAN_DB_PATH"]) as db: diff --git a/gn3/case_attributes.py b/gn3/case_attributes.py index d973b8e..efc82e9 100644 --- a/gn3/case_attributes.py +++ b/gn3/case_attributes.py @@ -26,8 +26,8 @@ from gn3.commands import run_cmd from gn3.db_utils import Connection, database_connection +from gn3.oauth2.authorisation import require_token from gn3.auth.authorisation.errors import AuthorisationError -from gn3.auth.authorisation.oauth2.resource_server import require_oauth caseattr = Blueprint("case-attribute", __name__) @@ -61,8 +61,10 @@ class CAJSONEncoder(json.JSONEncoder): return json.JSONEncoder.default(self, obj) def required_access( - inbredset_id: int, access_levels: tuple[str, ...]) -> Union[ - bool, tuple[str, ...]]: + token: dict, + inbredset_id: int, + access_levels: tuple[str, ...] +) -> Union[bool, tuple[str, ...]]: """Check whether the user has the appropriate access""" def __species_id__(conn): with conn.cursor() as cursor: @@ -71,19 +73,21 @@ def required_access( (inbredset_id,)) return cursor.fetchone()[0] try: - with (require_oauth.acquire("profile resource") as the_token, - database_connection(current_app.config["SQL_URI"]) as conn): + with database_connection(current_app.config["SQL_URI"]) as conn: result = requests.get( + # this section fetches the resource ID from the auth server urljoin(current_app.config["AUTH_SERVER_URL"], "auth/resource/inbredset/resource-id" f"/{__species_id__(conn)}/{inbredset_id}")) if result.status_code == 200: resource_id = result.json()["resource-id"] auth = requests.post( + # this section fetches the authorisations/privileges that + # the current user has on the resource we got above urljoin(current_app.config["AUTH_SERVER_URL"], "auth/resource/authorisation"), json={"resource-ids": [resource_id]}, - headers={"Authorization": f"Bearer {the_token.access_token}"}) + headers={"Authorization": f"Bearer {token['access_token']}"}) if auth.status_code == 200: privs = tuple(priv["privilege_id"] for role in auth.json()[resource_id]["roles"] @@ -398,14 +402,15 @@ def __apply_deletions__( params) def __apply_diff__( - conn: Connection, inbredset_id: int, diff_filename, the_diff) -> None: + conn: Connection, auth_token, inbredset_id: int, diff_filename, the_diff) -> None: """ Apply the changes in the diff at `diff_filename` to the data in the database if the user has appropriate privileges. """ - required_access( - inbredset_id, ("system:inbredset:edit-case-attribute", - "system:inbredset:apply-case-attribute-edit")) + required_access(auth_token, + inbredset_id, + ("system:inbredset:edit-case-attribute", + "system:inbredset:apply-case-attribute-edit")) diffs = the_diff["diff"] with conn.cursor(cursorclass=DictCursor) as cursor: # __apply_additions__(cursor, inbredset_id, diffs["Additions"]) @@ -419,6 +424,7 @@ def __apply_diff__( os.rename(diff_filename, new_path) def __reject_diff__(conn: Connection, + auth_token: dict, inbredset_id: int, diff_filename: Path, diff: dict) -> Path: @@ -426,38 +432,45 @@ def __reject_diff__(conn: Connection, Reject the changes in the diff at `diff_filename` to the data in the database if the user has appropriate privileges. """ - required_access( - inbredset_id, ("system:inbredset:edit-case-attribute", - "system:inbredset:apply-case-attribute-edit")) + required_access(auth_token, + inbredset_id, + ("system:inbredset:edit-case-attribute", + "system:inbredset:apply-case-attribute-edit")) __save_diff__(conn, diff, EditStatus.rejected) new_path = Path(diff_filename.parent, f"{diff_filename.stem}-rejected{diff_filename.suffix}") os.rename(diff_filename, new_path) return diff_filename @caseattr.route("/<int:inbredset_id>/add", methods=["POST"]) -def add_case_attributes(inbredset_id: int) -> Response: +@require_token +def add_case_attributes(inbredset_id: int, auth_token=None) -> Response: """Add a new case attribute for `InbredSetId`.""" - required_access(inbredset_id, ("system:inbredset:create-case-attribute",)) - with (require_oauth.acquire("profile resource") as the_token, # pylint: disable=[unused-variable] - database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable] + required_access( + auth_token, inbredset_id, ("system:inbredset:create-case-attribute",)) + with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable] raise NotImplementedError @caseattr.route("/<int:inbredset_id>/delete", methods=["POST"]) -def delete_case_attributes(inbredset_id: int) -> Response: +@require_token +def delete_case_attributes(inbredset_id: int, auth_token=None) -> Response: """Delete a case attribute from `InbredSetId`.""" - required_access(inbredset_id, ("system:inbredset:delete-case-attribute",)) - with (require_oauth.acquire("profile resource") as the_token, # pylint: disable=[unused-variable] - database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable] + required_access( + auth_token, inbredset_id, ("system:inbredset:delete-case-attribute",)) + with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable] raise NotImplementedError @caseattr.route("/<int:inbredset_id>/edit", methods=["POST"]) -def edit_case_attributes(inbredset_id: int) -> Response: - """Edit the case attributes for `InbredSetId` based on data received.""" - with (require_oauth.acquire("profile resource") as the_token, - database_connection(current_app.config["SQL_URI"]) as conn): - required_access(inbredset_id, +@require_token +def edit_case_attributes(inbredset_id: int, auth_token = None) -> Response: + """Edit the case attributes for `InbredSetId` based on data received. + + :inbredset_id: Identifier for the population that the case attribute belongs + :auth_token: A validated JWT from the auth server + """ + with database_connection(current_app.config["SQL_URI"]) as conn: + required_access(auth_token, + inbredset_id, ("system:inbredset:edit-case-attribute",)) - user = the_token.user fieldnames = tuple(["Strain"] + sorted( attr["Name"] for attr in __case_attribute_labels_by_inbred_set__(conn, inbredset_id))) @@ -465,7 +478,7 @@ def edit_case_attributes(inbredset_id: int) -> Response: diff_filename = __queue_diff__( conn, { "inbredset_id": inbredset_id, - "user_id": str(user.user_id), + "user_id": auth_token["jwt"]["sub"], "fieldnames": fieldnames, "diff": __compute_diff__( fieldnames, @@ -488,8 +501,11 @@ def edit_case_attributes(inbredset_id: int) -> Response: return response try: - __apply_diff__( - conn, inbredset_id, diff_filename, __load_diff__(diff_filename)) + __apply_diff__(conn, + auth_token, + inbredset_id, + diff_filename, + __load_diff__(diff_filename)) return jsonify({ "diff-status": "applied", "message": ("The changes to the case-attributes have been " @@ -555,37 +571,45 @@ def list_diffs(inbredset_id: int) -> Response: return resp @caseattr.route("/approve/<path:filename>", methods=["POST"]) -def approve_case_attributes_diff(filename: str) -> Response: +@require_token +def approve_case_attributes_diff(filename: str, auth_token = None) -> Response: """Approve the changes to the case attributes in the diff.""" diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR) diff_filename = Path(diff_dir, filename) the_diff = __load_diff__(diff_filename) with database_connection(current_app.config["SQL_URI"]) as conn: - __apply_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff) + __apply_diff__(conn, auth_token, the_diff["inbredset_id"], diff_filename, the_diff) return jsonify({ "message": "Applied the diff successfully.", "diff_filename": diff_filename.name }) @caseattr.route("/reject/<path:filename>", methods=["POST"]) -def reject_case_attributes_diff(filename: str) -> Response: +@require_token +def reject_case_attributes_diff(filename: str, auth_token=None) -> Response: """Reject the changes to the case attributes in the diff.""" diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR) diff_filename = Path(diff_dir, filename) the_diff = __load_diff__(diff_filename) with database_connection(current_app.config["SQL_URI"]) as conn: - __reject_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff) + __reject_diff__(conn, + auth_token, + the_diff["inbredset_id"], + diff_filename, + the_diff) return jsonify({ "message": "Rejected diff successfully", "diff_filename": diff_filename.name }) @caseattr.route("/<int:inbredset_id>/diff/<int:diff_id>/view", methods=["GET"]) -def view_diff(inbredset_id: int, diff_id: int) -> Response: +@require_token +def view_diff(inbredset_id: int, diff_id: int, auth_token=None) -> Response: """View a diff.""" with (database_connection(current_app.config["SQL_URI"]) as conn, conn.cursor(cursorclass=DictCursor) as cursor): - required_access(inbredset_id, ("system:inbredset:view-case-attribute",)) + required_access( + auth_token, inbredset_id, ("system:inbredset:view-case-attribute",)) cursor.execute( "SELECT * FROM caseattributes_audit WHERE id=%s", (diff_id,)) diff --git a/gn3/db/constants.py b/gn3/db/constants.py new file mode 100644 index 0000000..45e3bfc --- /dev/null +++ b/gn3/db/constants.py @@ -0,0 +1,152 @@ +""" +This module contains some constants used in other modules. +""" +PREFIXES = { + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "fabio": "http://purl.org/spar/fabio/", + "foaf": "http://xmlns.com/foaf/0.1/", + "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=", + "genotype": "http://genenetwork.org/genotype/", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "owl": "http://www.w3.org/2002/07/owl#", + "phenotype": "http://genenetwork.org/phenotype/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "publication": "http://genenetwork.org/publication/", + "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", + "up": "http://purl.uniprot.org/core/", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "xsd": "http://www.w3.org/2001/XMLSchema#", +} + +RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>" + for key, value in PREFIXES.items()]) + +BASE_CONTEXT = { + "data": "@graph", + "type": "@type", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", +} + +DATASET_CONTEXT = { + "accessRights": "dct:accessRights", + "accessionId": "dct:identifier", + "acknowledgement": "gnt:hasAcknowledgement", + "altLabel": "skos:altLabel", + "caseInfo": "gnt:hasCaseInfo", + "classifiedUnder": "xkos:classifiedUnder", + "contributors": "dct:creator", + "contactPoint": "dcat:contactPoint", + "created": "dct:created", + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "description": "dct:description", + "ex": "http://example.org/stuff/1.0/", + "experimentDesignInfo": "gnt:hasExperimentDesignInfo", + "experimentType": "gnt:hasExperimentType", + "foaf": "http://xmlns.com/foaf/0.1/", + "geoSeriesId": "gnt:hasGeoSeriesId", + "gnt": "http://genenetwork.org/term/", + "inbredSet": "gnt:belongsToGroup", + "label": "rdfs:label", + "normalization": "gnt:usesNormalization", + "platformInfo": "gnt:hasPlatformInfo", + "notes": "gnt:hasNotes", + "organization": "foaf:Organization", + "prefLabel": "skos:prefLabel", + "citation": "dct:isReferencedBy", + "GoTree": "gnt:hasGOTreeValue", + "platform": "gnt:usesPlatform", + "processingInfo": "gnt:hasDataProcessingInfo", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "specifics": "gnt:hasContentInfo", + "title": "dct:title", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "tissueInfo": "gnt:hasTissueInfo", + "tissue": "gnt:hasTissue", + "contactWebUrl": "foaf:homepage", + "contactName": "foaf:name", +} + +SEARCH_CONTEXT = { + "pages": "ex:pages", + "hits": "ex:hits", + "result": "ex:result", + "results": "ex:items", + "resultItem": "ex:resultType", + "currentPage": "ex:currentPage", +} + +DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | { + "classifiedUnder": "xkos:classifiedUnder", + "created": "dct:created", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "inbredSet": "ex:belongsToInbredSet", + "title": "dct:title", + "name": "rdfs:label", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "type": "@type", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", +} + +PUBLICATION_CONTEXT = { + "dct": "http://purl.org/dc/terms/", + "fabio": "http://purl.org/spar/fabio/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "title": "dct:title", + "journal": "fabio:Journal", + "volume": "prism:volume", + "page": "fabio:page", + "creator": "dct:creator", + "abstract": "dct:abstract", + "year": { + "@id": "fabio:hasPublicationYear", + "@type": "xsd:gYear", + }, + "month": { + "@id": "prism:publicationDate", + "@type": "xsd:gMonth" + }, +} + +PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | { + "skos": "http://www.w3.org/2004/02/skos/core#", + "dcat": "http://www.w3.org/ns/dcat#", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "traitName": "skos:altLabel", + "trait": "rdfs:label", + "altName": "rdfs:altLabel", + "description": "dct:description", + "abbreviation": "gnt:abbreviation", + "labCode": "gnt:labCode", + "submitter": "gnt:submitter", + "dataset": "dcat:Distribution", + "contributor": "dct:contributor", + "mean": "gnt:mean", + "locus": "gnt:locus", + "lodScore": "gnt:lodScore", + "references": "dct:isReferencedBy", + "additive": "gnt:additive", + "sequence": "gnt:sequence", + "prefLabel": "skos:prefLabel", + "identifier": "dct:identifier", + "chromosome": "gnt:chr", + "mb": "gnt:mb", + "peakLocation": "gnt:locus", + "species": "gnt:belongsToSpecies", + "group": "gnt:belongsToGroup", +} diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py index eb4014a..5a95683 100644 --- a/gn3/db/rdf.py +++ b/gn3/db/rdf.py @@ -4,39 +4,12 @@ This module is a collection of functions that handle SPARQL queries. """ import json - +from string import Template from SPARQLWrapper import SPARQLWrapper from pyld import jsonld # type: ignore - - -PREFIXES = { - "dcat": "http://www.w3.org/ns/dcat#", - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "fabio": "http://purl.org/spar/fabio/", - "foaf": "http://xmlns.com/foaf/0.1/", - "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=", - "genotype": "http://genenetwork.org/genotype/", - "gn": "http://genenetwork.org/id/", - "gnc": "http://genenetwork.org/category/", - "gnt": "http://genenetwork.org/term/", - "owl": "http://www.w3.org/2002/07/owl#", - "phenotype": "http://genenetwork.org/phenotype/", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "publication": "http://genenetwork.org/publication/", - "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "skos": "http://www.w3.org/2004/02/skos/core#", - "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", - "up": "http://purl.uniprot.org/core/", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", - "xsd": "http://www.w3.org/2001/XMLSchema#", -} - - -RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>" - for key, value in PREFIXES.items()]) +from gn3.db.constants import ( + RDF_PREFIXES, BASE_CONTEXT +) def sparql_construct_query(query: str, endpoint: str) -> dict: @@ -51,22 +24,101 @@ def sparql_construct_query(query: str, endpoint: str) -> dict: def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict: """Frame and then compact the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.compact(jsonld.frame(results, context), context) def query_and_compact(query: str, context: dict, endpoint: str) -> dict: """Compact the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.compact(results, context) def query_and_frame(query: str, context: dict, endpoint: str) -> dict: """Frame the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.frame(results, context) + + +def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict: + """Fetch all the Wiki entries using the symbol""" + # This query uses a sub-query to fetch the latest comment by the + # version id. + query = Template(""" +$prefix + +CONSTRUCT { + ?uid rdfs:label ?symbolName; + gnt:reason ?reason ; + gnt:species ?species ; + dct:references ?pmid ; + foaf:homepage ?weburl ; + rdfs:comment ?comment ; + foaf:mbox ?email ; + gnt:initial ?usercode ; + gnt:belongsToCategory ?category ; + gnt:hasVersion ?versionId ; + dct:created ?created ; + dct:identifier ?identifier . +} WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid rdfs:comment ?comment ; + gnt:symbol ?symbolId ; + rdf:type gnc:GNWikiEntry ; + dct:created ?createTime . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + { + SELECT (MAX(?vers) AS ?max) ?id_ WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid dct:identifier ?id_ ; + dct:hasVersion ?vers ; + dct:identifier ?id_ ; + gnt:symbol ?symbolId . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + } + } + ?uid dct:hasVersion ?max ; + dct:identifier ?id_ . + OPTIONAL { ?uid gnt:reason ?reason } . + OPTIONAL { + ?uid gnt:belongsToSpecies ?speciesId . + ?speciesId gnt:shortName ?species . + } . + OPTIONAL { ?uid dct:references ?pubmedId . } . + OPTIONAL { ?uid foaf:homepage ?weburl . } . + OPTIONAL { ?uid gnt:initial ?usercode . } . + OPTIONAL { ?uid gnt:mbox ?email . } . + OPTIONAL { ?uid gnt:belongsToCategory ?category . } . + BIND (str(?version) AS ?versionId) . + BIND (str(?id_) AS ?identifier) . + BIND (str(?pubmedId) AS ?pmid) . + BIND (str(?createTime) AS ?created) . +} +""").substitute(prefix=RDF_PREFIXES, symbol=symbol,) + context = BASE_CONTEXT | { + "foaf": "http://xmlns.com/foaf/0.1/", + "dct": "http://purl.org/dc/terms/", + "categories": "gnt:belongsToCategory", + "web_url": "foaf:homepage", + "version": "gnt:hasVersion", + "symbol": "rdfs:label", + "reason": "gnt:reason", + "species": "gnt:species", + "pubmed_id": "dct:references", + "email": "foaf:mbox", + "initial": "gnt:initial", + "comment": "rdfs:comment", + "created": "dct:created", + "id": "dct:identifier", + # This points to the RDF Node which is the unique identifier + # for this triplet. It's constructed using the comment-id and + # the comment-versionId + "wiki_identifier": "@id", + } + results = query_frame_and_compact( + query, context, + sparql_uri + ) + data = results.get("data") + if not data: + return results + return results diff --git a/gn3/db/wiki.py b/gn3/db/wiki.py new file mode 100644 index 0000000..abb1644 --- /dev/null +++ b/gn3/db/wiki.py @@ -0,0 +1,80 @@ +"""Helper functions to access wiki entries""" + +from typing import Dict, List + +from MySQLdb.cursors import DictCursor + + +class MissingDBDataException(Exception): + """Error due to DB missing some data""" + + +def get_latest_comment(connection, comment_id: str) -> int: + """ Latest comment is one with the highest versionId """ + cursor = connection.cursor(DictCursor) + query = """ SELECT versionId AS version, symbol, PubMed_ID AS pubmed_ids, sp.Name AS species, + comment, email, weburl, initial, reason + FROM `GeneRIF` gr + INNER JOIN Species sp USING(SpeciesId) + WHERE gr.Id = %s + ORDER BY versionId DESC LIMIT 1; + """ + cursor.execute(query, (comment_id,)) + result = cursor.fetchone() + result["pubmed_ids"] = [x.strip() for x in result["pubmed_ids"].split()] + categories_query = """ + SELECT grx.GeneRIFId, grx.versionId, gc.Name FROM GeneRIFXRef grx + INNER JOIN GeneCategory gc ON grx.GeneCategoryId=gc.Id + WHERE GeneRIFId = %s AND versionId=%s; + """ + + cursor.execute(categories_query, (comment_id, result["version"])) + categories = cursor.fetchall() + result["categories"] = [x["Name"] for x in categories] + return result + + +def get_species_id(cursor, species_name: str) -> int: + """Find species id given species `Name`""" + cursor.execute("SELECT SpeciesID from Species WHERE Name = %s", (species_name,)) + species_ids = cursor.fetchall() + if len(species_ids) != 1: + raise MissingDBDataException( + f"expected 1 species with Name={species_name} but found {len(species_ids)}!" + ) + return species_ids[0][0] + + +def get_next_comment_version(cursor, comment_id: int) -> int: + """Find the version to add, usually latest_version + 1""" + cursor.execute( + "SELECT MAX(versionId) as version_id from GeneRIF WHERE Id = %s", (comment_id,) + ) + latest_version = cursor.fetchone()[0] + if latest_version is None: + raise MissingDBDataException(f"No comment found with comment_id={comment_id}") + return latest_version + 1 + + +def get_categories_ids(cursor, categories: List[str]) -> List[int]: + """Get the categories_ids from a list of category strings""" + dict_cats = get_categories(cursor) + category_ids = [] + for category in set(categories): + cat_id = dict_cats.get(category.strip()) + if cat_id is None: + raise MissingDBDataException(f"Category with Name={category} not found") + category_ids.append(cat_id) + return category_ids + +def get_categories(cursor) -> Dict[str, int]: + cursor.execute("SELECT Name, Id from GeneCategory") + raw_categories = cursor.fetchall() + dict_cats = dict(raw_categories) + return dict_cats + +def get_species(cursor) -> Dict[str, str]: + cursor.execute("SELECT Name, SpeciesName from Species") + raw_species = cursor.fetchall() + dict_cats = dict(raw_species) + return dict_cats diff --git a/gn3/db_utils.py b/gn3/db_utils.py index e4dc81f..0d9bd0a 100644 --- a/gn3/db_utils.py +++ b/gn3/db_utils.py @@ -1,11 +1,15 @@ """module contains all db related stuff""" import contextlib +import logging from typing import Any, Iterator, Protocol, Tuple from urllib.parse import urlparse import MySQLdb as mdb import xapian +LOGGER = logging.getLogger(__file__) + + def parse_db_url(sql_uri: str) -> Tuple: """function to parse SQL_URI env variable note:there\ is a default value for SQL_URI so a tuple result is\ @@ -24,7 +28,7 @@ class Connection(Protocol): @contextlib.contextmanager -def database_connection(sql_uri) -> Iterator[Connection]: +def database_connection(sql_uri: str, logger: logging.Logger = LOGGER) -> Iterator[Connection]: """Connect to MySQL database.""" host, user, passwd, db_name, port = parse_db_url(sql_uri) connection = mdb.connect(db=db_name, @@ -35,6 +39,7 @@ def database_connection(sql_uri) -> Iterator[Connection]: try: yield connection except mdb.Error as _mbde: + logger.error("DB error encountered", exc_info=True) connection.rollback() finally: connection.commit() diff --git a/gn3/errors.py b/gn3/errors.py index c53604f..ec7a554 100644 --- a/gn3/errors.py +++ b/gn3/errors.py @@ -15,6 +15,7 @@ from werkzeug.exceptions import NotFound from authlib.oauth2.rfc6749.errors import OAuth2Error from flask import Flask, jsonify, Response, current_app +from gn3.oauth2 import errors as oautherrors from gn3.auth.authorisation.errors import AuthorisationError from gn3.llms.errors import LLMError @@ -28,6 +29,7 @@ def add_trace(exc: Exception, jsonmsg: dict) -> dict: def page_not_found(pnf): """Generic 404 handler.""" + current_app.logger.error("Handling 404 errors", exc_info=True) return jsonify(add_trace(pnf, { "error": pnf.name, "error_description": pnf.description @@ -36,6 +38,7 @@ def page_not_found(pnf): def internal_server_error(pnf): """Generic 404 handler.""" + current_app.logger.error("Handling internal server errors", exc_info=True) return jsonify(add_trace(pnf, { "error": pnf.name, "error_description": pnf.description @@ -44,15 +47,16 @@ def internal_server_error(pnf): def url_server_error(pnf): """Handler for an exception with a url connection.""" + current_app.logger.error("Handling url server errors", exc_info=True) return jsonify(add_trace(pnf, { "error": f"URLLib Error no: {pnf.reason.errno}", "error_description": pnf.reason.strerror, - })) + })), 500 def handle_authorisation_error(exc: AuthorisationError): """Handle AuthorisationError if not handled anywhere else.""" - current_app.logger.error(exc) + current_app.logger.error("Handling external auth errors", exc_info=True) return jsonify(add_trace(exc, { "error": type(exc).__name__, "error_description": " :: ".join(exc.args) @@ -61,7 +65,7 @@ def handle_authorisation_error(exc: AuthorisationError): def handle_oauth2_errors(exc: OAuth2Error): """Handle OAuth2Error if not handled anywhere else.""" - current_app.logger.error(exc) + current_app.logger.error("Handling external oauth2 errors", exc_info=True) return jsonify(add_trace(exc, { "error": exc.error, "error_description": exc.description, @@ -70,7 +74,7 @@ def handle_oauth2_errors(exc: OAuth2Error): def handle_sqlite3_errors(exc: OperationalError): """Handle sqlite3 errors if not handled anywhere else.""" - current_app.logger.error(exc) + current_app.logger.error("Handling sqlite3 errors", exc_info=True) return jsonify({ "error": "DatabaseError", "error_description": exc.args[0], @@ -78,24 +82,23 @@ def handle_sqlite3_errors(exc: OperationalError): def handle_sparql_errors(exc): - """Handle sqlite3 errors if not handled anywhere else.""" - current_app.logger.error(exc) - __code = { - EndPointInternalError: 500, - EndPointNotFound: 400, - QueryBadFormed: 400, - Unauthorized: 401, - URITooLong: 414, + """Handle sparql/virtuoso errors if not handled anywhere else.""" + current_app.logger.error("Handling sparql errors", exc_info=True) + code = { + "EndPointInternalError": 500, + "EndPointNotFound": 404, + "QueryBadFormed": 400, + "Unauthorized": 401, + "URITooLong": 414, } return jsonify({ "error": exc.msg, - "error_description": str(exc), - }), __code.get(exc) + }), code.get(exc.__class__.__name__) def handle_generic(exc: Exception) -> Response: """Handle generic exception.""" - current_app.logger.error(exc) + current_app.logger.error("Handling generic errors", exc_info=True) resp = jsonify({ "error": type(exc).__name__, "error_description": ( @@ -106,6 +109,15 @@ def handle_generic(exc: Exception) -> Response: return resp +def handle_local_authorisation_errors(exc: oautherrors.AuthorisationError): + """Handle errors relating to authorisation that are raised locally.""" + current_app.logger.error("Handling local auth errors", exc_info=True) + return jsonify(add_trace(exc, { + "error": type(exc).__name__, + "error_description": " ".join(exc.args) + })), 400 + + def handle_llm_error(exc: Exception) -> Response: """ Handle llm erros if not handled anywhere else. """ current_app.logger.error(exc) diff --git a/gn3/oauth2/__init__.py b/gn3/oauth2/__init__.py new file mode 100644 index 0000000..8001d34 --- /dev/null +++ b/gn3/oauth2/__init__.py @@ -0,0 +1 @@ +"""Package to handle OAuth2 authorisation and other issues.""" diff --git a/gn3/oauth2/authorisation.py b/gn3/oauth2/authorisation.py new file mode 100644 index 0000000..b2dd1ae --- /dev/null +++ b/gn3/oauth2/authorisation.py @@ -0,0 +1,34 @@ +"""Handle authorisation with auth server.""" +from functools import wraps + +from flask import request, jsonify, current_app as app + +from gn3.oauth2 import jwks +from gn3.oauth2.errors import TokenValidationError + + +def require_token(func): + """Check for and verify bearer token.""" + @wraps(func) + def __auth__(*args, **kwargs): + try: + bearer = request.headers.get("Authorization", "") + if bearer.startswith("Bearer"): + # validate token and return it + _extra, token = [item.strip() for item in bearer.split(" ")] + _jwt = jwks.validate_token( + token, + jwks.fetch_jwks(app.config["AUTH_SERVER_URL"], + "auth/public-jwks")) + return func(*args, **{**kwargs, "auth_token": {"access_token": token, "jwt": _jwt}}) + error_message = "We expected a bearer token but did not get one." + except TokenValidationError as _tve: + app.logger.debug("Token validation failed.", exc_info=True) + error_message = "The token was found to be invalid." + + return jsonify({ + "error": "TokenValidationError", + "description": error_message + }), 400 + + return __auth__ diff --git a/gn3/oauth2/errors.py b/gn3/oauth2/errors.py new file mode 100644 index 0000000..f8cfd2c --- /dev/null +++ b/gn3/oauth2/errors.py @@ -0,0 +1,8 @@ +"""List of possible errors.""" + +class AuthorisationError(Exception): + """Top-level error class dealing with generic authorisation errors.""" + + +class TokenValidationError(AuthorisationError): + """Class to indicate that token validation failed.""" diff --git a/gn3/oauth2/jwks.py b/gn3/oauth2/jwks.py new file mode 100644 index 0000000..8798a3f --- /dev/null +++ b/gn3/oauth2/jwks.py @@ -0,0 +1,36 @@ +"""Utilities dealing with JSON Web Keys (JWK)""" +from urllib.parse import urljoin + +import requests +from flask import current_app as app +from authlib.jose.errors import BadSignatureError +from authlib.jose import KeySet, JsonWebKey, JsonWebToken + +from gn3.oauth2.errors import TokenValidationError + + +def fetch_jwks(authserveruri: str, path: str = "auth/public-jwks") -> KeySet: + """Fetch the JWKs from a particular URI""" + try: + response = requests.get(urljoin(authserveruri, path)) + if response.status_code == 200: + return KeySet([ + JsonWebKey.import_key(key) for key in response.json()["jwks"]]) + # XXXX: TODO: Catch specific exception we need. + # pylint: disable=W0703 + except Exception as _exc: + app.logger.debug("There was an error fetching the JSON Web Keys.", + exc_info=True) + + return KeySet([]) + + +def validate_token(token: str, keys: KeySet) -> dict: + """Validate the token against the given keys.""" + for key in keys.keys: + try: + return JsonWebToken(["RS256"]).decode(token, key=key) + except BadSignatureError as _bse: + pass + + raise TokenValidationError("No key was found for validation.") diff --git a/gn3/settings.py b/gn3/settings.py index acf3619..1e794ff 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -85,7 +85,7 @@ ROUND_TO = 10 MULTIPROCESSOR_PROCS = 6 # Number of processes to spawn -AUTH_SERVER_URL = "" +AUTH_SERVER_URL = "https://auth.genenetwork.org" AUTH_MIGRATIONS = "migrations/auth" AUTH_DB = os.environ.get( "AUTH_DB", f"{os.environ.get('HOME')}/genenetwork/gn3_files/db/auth.db") @@ -93,8 +93,6 @@ OAUTH2_SCOPE = ( "profile", "group", "role", "resource", "user", "masquerade", "introspect") -GNQA_DB = os.environ.get( - "GNQA_DB", f"{os.environ.get('HOME')}/tmp/gnqa.db") try: # *** SECURITY CONCERN *** |