aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
Diffstat (limited to 'gn3')
-rw-r--r--gn3/api/correlation.py2
-rw-r--r--gn3/api/heatmaps.py2
-rw-r--r--gn3/api/menu.py2
-rw-r--r--gn3/api/metadata.py349
-rw-r--r--gn3/api/metadata_api/wiki.py119
-rw-r--r--gn3/api/rqtl.py4
-rw-r--r--gn3/api/search.py25
-rw-r--r--gn3/case_attributes.py96
-rw-r--r--gn3/db/constants.py152
-rw-r--r--gn3/db/rdf.py126
-rw-r--r--gn3/db/wiki.py80
-rw-r--r--gn3/db_utils.py7
-rw-r--r--gn3/errors.py42
-rw-r--r--gn3/oauth2/__init__.py1
-rw-r--r--gn3/oauth2/authorisation.py34
-rw-r--r--gn3/oauth2/errors.py8
-rw-r--r--gn3/oauth2/jwks.py36
-rw-r--r--gn3/settings.py4
18 files changed, 648 insertions, 441 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index eb4cc7d..c77dd93 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -64,7 +64,7 @@ def compute_lit_corr(species=None, gene_id=None):
might be needed for actual computing of the correlation results
"""
- with database_connection(current_app.config["SQL_URI"]) as conn:
+ with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
target_traits_gene_ids = request.get_json()
target_trait_gene_list = list(target_traits_gene_ids.items())
diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py
index 632c54a..172d555 100644
--- a/gn3/api/heatmaps.py
+++ b/gn3/api/heatmaps.py
@@ -24,7 +24,7 @@ def clustered_heatmaps():
return jsonify({
"message": "You need to provide at least two trait names."
}), 400
- with database_connection(current_app.config["SQL_URI"]) as conn:
+ with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
def parse_trait_fullname(trait):
name_parts = trait.split(":")
return f"{name_parts[1]}::{name_parts[0]}"
diff --git a/gn3/api/menu.py b/gn3/api/menu.py
index 58b761e..377ac6b 100644
--- a/gn3/api/menu.py
+++ b/gn3/api/menu.py
@@ -10,5 +10,5 @@ menu = Blueprint("menu", __name__)
@menu.route("/generate/json")
def generate_json():
"""Get the menu in the JSON format"""
- with database_connection(current_app.config["SQL_URI"]) as conn:
+ with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
return jsonify(gen_dropdown_json(conn))
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index 91dc115..3f28f5d 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -5,7 +5,6 @@ from string import Template
from pathlib import Path
from authlib.jose import jwt
-
from flask import Blueprint
from flask import request
from flask import current_app
@@ -14,135 +13,20 @@ from gn3.auth.authorisation.errors import AuthorisationError
from gn3.db.datasets import (retrieve_metadata,
save_metadata,
get_history)
-from gn3.db.rdf import RDF_PREFIXES
from gn3.db.rdf import (query_frame_and_compact,
- query_and_compact,
- query_and_frame)
-
-
-BASE_CONTEXT = {
- "data": "@graph",
- "id": "@id",
- "type": "@type",
- "gnc": "http://genenetwork.org/category/",
- "gnt": "http://genenetwork.org/term/",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
-}
-
-DATASET_CONTEXT = {
- "accessRights": "dct:accessRights",
- "accessionId": "dct:identifier",
- "acknowledgement": "gnt:hasAcknowledgement",
- "altLabel": "skos:altLabel",
- "caseInfo": "gnt:hasCaseInfo",
- "classifiedUnder": "xkos:classifiedUnder",
- "contributors": "dct:creator",
- "contactPoint": "dcat:contactPoint",
- "created": "dct:created",
- "dcat": "http://www.w3.org/ns/dcat#",
- "dct": "http://purl.org/dc/terms/",
- "description": "dct:description",
- "ex": "http://example.org/stuff/1.0/",
- "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
- "experimentType": "gnt:hasExperimentType",
- "foaf": "http://xmlns.com/foaf/0.1/",
- "geoSeriesId": "gnt:hasGeoSeriesId",
- "gnt": "http://genenetwork.org/term/",
- "inbredSet": "gnt:belongsToGroup",
- "label": "rdfs:label",
- "normalization": "gnt:usesNormalization",
- "platformInfo": "gnt:hasPlatformInfo",
- "notes": "gnt:hasNotes",
- "organization": "foaf:Organization",
- "prefLabel": "skos:prefLabel",
- "citation": "dct:isReferencedBy",
- "GoTree": "gnt:hasGOTreeValue",
- "platform": "gnt:usesPlatform",
- "processingInfo": "gnt:hasDataProcessingInfo",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "skos": "http://www.w3.org/2004/02/skos/core#",
- "specifics": "gnt:hasContentInfo",
- "title": "dct:title",
- "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
- "tissueInfo": "gnt:hasTissueInfo",
- "tissue": "gnt:hasTissue",
- "contactWebUrl": "foaf:homepage",
- "contactName": "foaf:name",
-}
-
-SEARCH_CONTEXT = {
- "pages": "ex:pages",
- "hits": "ex:hits",
- "result": "ex:result",
- "results": "ex:items",
- "resultItem": "ex:resultType",
- "currentPage": "ex:currentPage",
-}
-
-DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | {
- "classifiedUnder": "xkos:classifiedUnder",
- "created": "dct:created",
- "dct": "http://purl.org/dc/terms/",
- "ex": "http://example.org/stuff/1.0/",
- "inbredSet": "ex:belongsToInbredSet",
- "title": "dct:title",
- "name": "rdfs:label",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "type": "@type",
- "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
-}
+ query_and_compact)
+from gn3.db.constants import (
+ RDF_PREFIXES, BASE_CONTEXT,
+ DATASET_CONTEXT,
+ DATASET_SEARCH_CONTEXT, PUBLICATION_CONTEXT,
+ PHENOTYPE_CONTEXT
+)
-PUBLICATION_CONTEXT = {
- "dct": "http://purl.org/dc/terms/",
- "fabio": "http://purl.org/spar/fabio/",
- "prism": "http://prismstandard.org/namespaces/basic/2.0/",
- "xsd": "http://www.w3.org/2001/XMLSchema#",
- "title": "dct:title",
- "journal": "fabio:Journal",
- "volume": "prism:volume",
- "page": "fabio:page",
- "creator": "dct:creator",
- "abstract": "dct:abstract",
- "year": {
- "@id": "fabio:hasPublicationYear",
- "@type": "xsd:gYear",
- },
- "month": {
- "@id": "prism:publicationDate",
- "@type": "xsd:gMonth"
- },
-}
+from gn3.api.metadata_api import wiki
-PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
- "skos": "http://www.w3.org/2004/02/skos/core#",
- "dcat": "http://www.w3.org/ns/dcat#",
- "prism": "http://prismstandard.org/namespaces/basic/2.0/",
- "traitName": "skos:altLabel",
- "trait": "rdfs:label",
- "altName": "rdfs:altLabel",
- "description": "dct:description",
- "abbreviation": "gnt:abbreviation",
- "labCode": "gnt:labCode",
- "submitter": "gnt:submitter",
- "dataset": "dcat:Distribution",
- "contributor": "dct:contributor",
- "mean": "gnt:mean",
- "locus": "gnt:locus",
- "lodScore": "gnt:lodScore",
- "references": "dct:isReferencedBy",
- "additive": "gnt:additive",
- "sequence": "gnt:sequence",
- "prefLabel": "skos:prefLabel",
- "identifier": "dct:identifier",
- "chromosome": "gnt:chr",
- "mb": "gnt:mb",
- "peakLocation": "gnt:locus",
- "species": "gnt:belongsToSpecies",
- "group": "gnt:belongsToGroup",
-}
metadata = Blueprint("metadata", __name__)
+metadata.register_blueprint(wiki.wiki_blueprint)
@metadata.route("/datasets/<name>", methods=["GET"])
@@ -208,7 +92,7 @@ CONSTRUCT {
(Path(
current_app.config.get("DATA_DIR")
) / "gn-docs/general/datasets" /
- Path(__result.get("id", "")).stem).as_posix()
+ Path(__result.get("id", "")).stem).as_posix()
)
@@ -348,69 +232,6 @@ def edit_dataset():
lambda x: ("Edit successfull", 201)
)
-@metadata.route("/datasets/search/<term>", methods=["GET"])
-def search_datasets(term):
- """Search datasets"""
- args = request.args
- page = args.get("page", 0)
- page_size = args.get("per-page", 10)
- _query = Template("""
-$prefix
-
-CONSTRUCT {
- ex:result rdf:type ex:resultType ;
- ex:pages ?pages ;
- ex:hits ?hits ;
- ex:currentPage $offset ;
- ex:items [
- rdfs:label ?label ;
- dct:title ?title ;
- ex:belongsToInbredSet ?inbredSetName ;
- xkos:classifiedUnder ?datasetType ;
- ]
-} WHERE {
-{
- SELECT DISTINCT ?dataset ?label ?inbredSetName ?datasetType ?title
- WHERE {
- ?dataset rdf:type dcat:Dataset ;
- rdfs:label ?label ;
- ?datasetPredicate ?datasetObject ;
- xkos:classifiedUnder ?inbredSet .
- ?inbredSet ^skos:member gnc:Set ;
- rdfs:label ?inbredSetName .
- ?datasetObject bif:contains "'$term'" .
- OPTIONAL {
- ?dataset dct:title ?title .
- } .
- OPTIONAL {
- ?classification ^xkos:classifiedUnder ?dataset ;
- ^skos:member gnc:DatasetType ;
- ?typePredicate ?typeName ;
- skos:prefLabel ?datasetType .
- }
- } ORDER BY ?dataset LIMIT $limit OFFSET $offset
-}
-
-{
- SELECT (COUNT(DISTINCT ?dataset)/$limit+1 AS ?pages)
- (COUNT(DISTINCT ?dataset) AS ?hits) WHERE {
- ?dataset rdf:type dcat:Dataset ;
- ?p ?o .
- ?o bif:contains "'$term'" .
- }
-}
-
-}
-""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page)
- _context = {
- "@context": BASE_CONTEXT | DATASET_SEARCH_CONTEXT,
- "type": "resultItem",
- }
- return query_frame_and_compact(
- _query, _context,
- current_app.config.get("SPARQL_ENDPOINT")
- )
-
@metadata.route("/publications/<name>", methods=["GET"])
def publications(name):
@@ -436,65 +257,6 @@ CONSTRUCT {
)
-@metadata.route("/publications/search/<term>", methods=["GET"])
-def search_publications(term):
- """Search publications"""
- args = request.args
- page = args.get("page", 0)
- page_size = args.get("per-page", 10)
- _query = Template("""
-$prefix
-
-CONSTRUCT {
- ex:result rdf:type ex:resultType ;
- ex:totalCount ?totalCount ;
- ex:currentPage $offset ;
- ex:items [
- rdfs:label ?publication ;
- dct:title ?title ;
- ]
-} WHERE {
-{
- SELECT ?publication ?title ?pmid WHERE {
- ?pub rdf:type fabio:ResearchPaper ;
- ?predicate ?object ;
- dct:title ?title .
- ?object bif:contains "'$term'" .
- BIND( STR(?pub) AS ?publication ) .
- } ORDER BY ?title LIMIT $limit OFFSET $offset
- }
-{
- SELECT (COUNT(*)/$limit+1 AS ?totalCount) WHERE {
- ?publication rdf:type fabio:ResearchPaper ;
- ?predicate ?object .
- ?object bif:contains "'$term'" .
- }
-}
-}
-""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page)
- _context = {
- "@context": BASE_CONTEXT | SEARCH_CONTEXT | {
- "dct": "http://purl.org/dc/terms/",
- "ex": "http://example.org/stuff/1.0/",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "fabio": "http://purl.org/spar/fabio/",
- "title": "dct:title",
- "pubmed": "fabio:hasPubMedId",
- "currentPage": "ex:currentPage",
- "url": "rdfs:label",
- },
- "type": "resultItem",
- "paper": {
- "@type": "fabio:ResearchPaper",
- "@container": "@index"
- }
- }
- return query_and_frame(
- _query, _context,
- current_app.config.get("SPARQL_ENDPOINT")
- )
-
-
@metadata.route("/phenotypes/<name>", methods=["GET"])
@metadata.route("/phenotypes/<group>/<name>", methods=["GET"])
def phenotypes(name, group=None):
@@ -630,97 +392,6 @@ CONSTRUCT {
)
-@metadata.route("/genewikis/gn/<symbol>", methods=["GET"])
-def get_gn_genewiki_entries(symbol):
- """Fetch the GN and NCBI GeneRIF entries"""
- args = request.args
- page = args.get("page", 0)
- page_size = args.get("per-page", 10)
- _query = Template("""
-$prefix
-
-CONSTRUCT {
- ?symbol ex:entries [
- rdfs:comment ?comment ;
- ex:species ?species_ ;
- dct:created ?createTime ;
- dct:references ?pmids ;
- dct:creator ?creator ;
- gnt:belongsToCategory ?categories ;
- ] .
- ?symbol rdf:type gnc:GNWikiEntry ;
- ex:totalCount ?totalCount ;
- ex:currentPage $offset .
-} WHERE {
-{
- SELECT ?symbol ?comment
- (GROUP_CONCAT(DISTINCT ?speciesName; SEPARATOR='; ') AS ?species_)
- ?createTime ?creator
- (GROUP_CONCAT(DISTINCT ?pubmed; SEPARATOR='; ') AS ?pmids)
- (GROUP_CONCAT(DISTINCT ?category; SEPARATOR='; ') AS ?categories)
- WHERE {
- ?symbol rdfs:label ?label ;
- rdfs:comment _:entry .
- ?label bif:contains "'$symbol'" .
- _:entry rdf:type gnc:GNWikiEntry ;
- rdfs:comment ?comment .
- OPTIONAL {
- ?species ^xkos:classifiedUnder _:entry ;
- ^skos:member gnc:Species ;
- skos:prefLabel ?speciesName .
- } .
- OPTIONAL { _:entry dct:created ?createTime . } .
- OPTIONAL { _:entry dct:references ?pubmed . } .
- OPTIONAL {
- ?investigator foaf:name ?creator ;
- ^dct:creator _:entry .
- } .
- OPTIONAL { _:entry gnt:belongsToCategory ?category . } .
- } GROUP BY ?comment ?symbol ?createTime
- ?creator ORDER BY ?createTime LIMIT $limit OFFSET $offset
-}
-
-{
- SELECT (COUNT(DISTINCT ?comment)/$limit+1 AS ?totalCount) WHERE {
- ?symbol rdfs:comment _:entry ;
- rdfs:label ?label .
- _:entry rdfs:comment ?comment ;
- rdf:type gnc:GNWikiEntry .
- ?label bif:contains "'$symbol'" .
- }
-}
-}
-""").substitute(prefix=RDF_PREFIXES, symbol=symbol,
- limit=page_size, offset=page)
- _context = {
- "@context": BASE_CONTEXT | {
- "ex": "http://example.org/stuff/1.0/",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "gnt": "http://genenetwork.org/term/",
- "gnc": "http://genenetwork.org/category/",
- "dct": "http://purl.org/dc/terms/",
- "xsd": "http://www.w3.org/2001/XMLSchema#",
- "entries": "ex:entries",
- "comment": "rdfs:comment",
- "species": "ex:species",
- "category": 'gnt:belongsToCategory',
- "author": "dct:creator",
- "pubmed": "dct:references",
- "currentPage": "ex:currentPage",
- "pages": "ex:totalCount",
- "created": {
- "@id": "dct:created",
- "@type": "xsd:datetime"
- },
- },
- "type": "gnc:GNWikiEntry"
- }
- return query_frame_and_compact(
- _query, _context,
- current_app.config.get("SPARQL_ENDPOINT")
- )
-
-
@metadata.route("/genewikis/ncbi/<symbol>", methods=["GET"])
def get_ncbi_genewiki_entries(symbol):
"""Fetch the NCBI GeneRIF entries"""
diff --git a/gn3/api/metadata_api/wiki.py b/gn3/api/metadata_api/wiki.py
new file mode 100644
index 0000000..a4abef6
--- /dev/null
+++ b/gn3/api/metadata_api/wiki.py
@@ -0,0 +1,119 @@
+"""API for accessing/editting wiki metadata"""
+
+import datetime
+from typing import Any, Dict
+from flask import Blueprint, request, jsonify, current_app, make_response
+from gn3 import db_utils
+from gn3.db import wiki
+from gn3.db.rdf import (query_frame_and_compact,
+ get_wiki_entries_by_symbol)
+
+
+wiki_blueprint = Blueprint("wiki", __name__, url_prefix="wiki")
+
+
+@wiki_blueprint.route("/<int:comment_id>/edit", methods=["POST"])
+def edit_wiki(comment_id: int):
+ """Edit wiki comment. This is achieved by adding another entry with a new VersionId"""
+ # FIXME: attempt to check and fix for types here with relevant errors
+ payload: Dict[str, Any] = request.json # type: ignore
+ pubmed_ids = [str(x) for x in payload.get("pubmed_ids", [])]
+
+ insert_dict = {
+ "Id": comment_id,
+ "symbol": payload["symbol"],
+ "PubMed_ID": " ".join(pubmed_ids),
+ "comment": payload["comment"],
+ "email": payload["email"],
+ "createtime": datetime.datetime.now(datetime.timezone.utc).strftime(
+ "%Y-%m-%d %H:%M"
+ ),
+ "user_ip": request.environ.get("HTTP_X_REAL_IP", request.remote_addr),
+ "weburl": payload.get("web_url"),
+ "initial": payload.get("initial"),
+ "reason": payload["reason"],
+ }
+
+ insert_query = """
+ INSERT INTO GeneRIF (Id, versionId, symbol, PubMed_ID, SpeciesID, comment,
+ email, createtime, user_ip, weburl, initial, reason)
+ VALUES (%(Id)s, %(versionId)s, %(symbol)s, %(PubMed_ID)s, %(SpeciesID)s, %(comment)s, %(email)s, %(createtime)s, %(user_ip)s, %(weburl)s, %(initial)s, %(reason)s)
+ """
+ with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+ cursor = conn.cursor()
+ try:
+ category_ids = wiki.get_categories_ids(
+ cursor, payload["categories"])
+ species_id = wiki.get_species_id(cursor, payload["species"])
+ next_version = wiki.get_next_comment_version(cursor, comment_id)
+ except wiki.MissingDBDataException as missing_exc:
+ return jsonify(error=f"Error editting wiki entry, {missing_exc}"), 500
+ insert_dict["SpeciesID"] = species_id
+ insert_dict["versionId"] = next_version
+ current_app.logger.debug(f"Running query: {insert_query}")
+ cursor.execute(insert_query, insert_dict)
+ category_addition_query = """
+ INSERT INTO GeneRIFXRef (GeneRIFId, versionId, GeneCategoryId)
+ VALUES (%s, %s, %s)
+ """
+
+ for cat_id in category_ids:
+ current_app.logger.debug(
+ f"Running query: {category_addition_query}")
+ cursor.execute(
+ category_addition_query, (comment_id,
+ insert_dict["versionId"], cat_id)
+ )
+ return jsonify({"success": "ok"})
+ return jsonify(error="Error editing wiki entry, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/<string:symbol>", methods=["GET"])
+def get_wiki_entries(symbol: str):
+ """Fetch wiki entries"""
+ content_type = request.headers.get("Content-Type")
+ status_code = 200
+ response = get_wiki_entries_by_symbol(
+ symbol=symbol,
+ sparql_uri=current_app.config["SPARQL_ENDPOINT"])
+ data = response.get("data")
+ if not data:
+ data = {}
+ status_code = 404
+ if content_type == "application/ld+json":
+ payload = make_response(response)
+ payload.headers["Content-Type"] = "application/ld+json"
+ return payload, status_code
+ return jsonify(data), status_code
+
+
+@wiki_blueprint.route("/<int:comment_id>", methods=["GET"])
+def get_wiki(comment_id: int):
+ """
+ Gets latest wiki comments.
+
+ TODO: fetch this from RIF
+ """
+ with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+ return jsonify(wiki.get_latest_comment(conn, comment_id))
+ return jsonify(error="Error fetching wiki entry, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/categories", methods=["GET"])
+def get_categories():
+ """ Gets list of supported categories for RIF """
+ with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+ cursor = conn.cursor()
+ categories_dict = wiki.get_categories(cursor)
+ return jsonify(categories_dict)
+ return jsonify(error="Error getting categories, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/species", methods=["GET"])
+def get_species():
+ """ Gets list of all species, contains name and SpeciesName """
+ with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+ cursor = conn.cursor()
+ species_dict = wiki.get_species(cursor)
+ return jsonify(species_dict)
+ return jsonify(error="Error getting species, most likely due to DB error!"), 500
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index 70ebe12..ae0110d 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -25,11 +25,11 @@ run the rqtl_wrapper script and return the results as JSON
raise FileNotFoundError
# Split kwargs by those with values and boolean ones that just convert to True/False
- kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"]
+ kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control"]
boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
all_kwargs = kwargs + boolean_kwargs
- rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
+ rqtl_kwargs = {"geno": genofile, "pheno": phenofile, "outdir": current_app.config.get("TMPDIR")}
rqtl_bool_kwargs = []
for kwarg in all_kwargs:
if kwarg in request.form:
diff --git a/gn3/api/search.py b/gn3/api/search.py
index c741b15..f696428 100644
--- a/gn3/api/search.py
+++ b/gn3/api/search.py
@@ -194,23 +194,36 @@ def parse_location_field(species_query: xapian.Query,
.maybe(xapian.Query.MatchNothing, make_query))
+def parse_boolean_prefixed_field(prefix: str, query: bytes) -> xapian.Query:
+ """Parse boolean prefixed field and return a xapian query."""
+ # For some reason, xapian does not stem boolean prefixed fields
+ # when the query starts with a capital letter. We need it to stem
+ # always. Hence this function.
+ return xapian.Query(prefix + query.decode("utf-8").lower())
+
+
# pylint: disable=too-many-locals
def parse_query(synteny_files_directory: Path, query: str):
"""Parse search query using GeneNetwork specific field processors."""
queryparser = xapian.QueryParser()
queryparser.set_stemmer(xapian.Stem("en"))
- queryparser.set_stemming_strategy(queryparser.STEM_SOME)
+ queryparser.set_stemming_strategy(queryparser.STEM_ALL_Z)
species_prefix = "XS"
chromosome_prefix = "XC"
queryparser.add_boolean_prefix("author", "A")
queryparser.add_boolean_prefix("species", species_prefix)
- queryparser.add_boolean_prefix("group", "XG")
+ queryparser.add_boolean_prefix("group",
+ FieldProcessor(partial(parse_boolean_prefixed_field, "XG")))
queryparser.add_boolean_prefix("tissue", "XI")
queryparser.add_boolean_prefix("dataset", "XDS")
queryparser.add_boolean_prefix("symbol", "XY")
queryparser.add_boolean_prefix("chr", chromosome_prefix)
queryparser.add_boolean_prefix("peakchr", "XPC")
queryparser.add_prefix("description", "XD")
+ queryparser.add_prefix("rif", "XRF")
+ queryparser.add_prefix("wiki", "XWK")
+ queryparser.add_prefix("RIF", "XRF")
+ queryparser.add_prefix("WIKI", "XWK")
range_prefixes = ["mean", "peak", "position", "peakmb", "additive", "year"]
for i, prefix in enumerate(range_prefixes):
# Treat position specially since it needs its own field processor.
@@ -263,11 +276,13 @@ def search_results():
if page < 1:
abort(404, description="Requested page does not exist")
results_per_page = args.get("per_page", default=100, type=int)
- maximum_results_per_page = 10000
+ maximum_results_per_page = 50000
if results_per_page > maximum_results_per_page:
abort(400, description="Requested too many search results")
-
- query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring)
+ try:
+ query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring)
+ except xapian.QueryParserError as err:
+ return jsonify({"error_type": str(err.get_type()), "error": err.get_msg()}), 400
traits = []
# pylint: disable=invalid-name
with xapian_database(current_app.config["XAPIAN_DB_PATH"]) as db:
diff --git a/gn3/case_attributes.py b/gn3/case_attributes.py
index d973b8e..efc82e9 100644
--- a/gn3/case_attributes.py
+++ b/gn3/case_attributes.py
@@ -26,8 +26,8 @@ from gn3.commands import run_cmd
from gn3.db_utils import Connection, database_connection
+from gn3.oauth2.authorisation import require_token
from gn3.auth.authorisation.errors import AuthorisationError
-from gn3.auth.authorisation.oauth2.resource_server import require_oauth
caseattr = Blueprint("case-attribute", __name__)
@@ -61,8 +61,10 @@ class CAJSONEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, obj)
def required_access(
- inbredset_id: int, access_levels: tuple[str, ...]) -> Union[
- bool, tuple[str, ...]]:
+ token: dict,
+ inbredset_id: int,
+ access_levels: tuple[str, ...]
+) -> Union[bool, tuple[str, ...]]:
"""Check whether the user has the appropriate access"""
def __species_id__(conn):
with conn.cursor() as cursor:
@@ -71,19 +73,21 @@ def required_access(
(inbredset_id,))
return cursor.fetchone()[0]
try:
- with (require_oauth.acquire("profile resource") as the_token,
- database_connection(current_app.config["SQL_URI"]) as conn):
+ with database_connection(current_app.config["SQL_URI"]) as conn:
result = requests.get(
+ # this section fetches the resource ID from the auth server
urljoin(current_app.config["AUTH_SERVER_URL"],
"auth/resource/inbredset/resource-id"
f"/{__species_id__(conn)}/{inbredset_id}"))
if result.status_code == 200:
resource_id = result.json()["resource-id"]
auth = requests.post(
+ # this section fetches the authorisations/privileges that
+ # the current user has on the resource we got above
urljoin(current_app.config["AUTH_SERVER_URL"],
"auth/resource/authorisation"),
json={"resource-ids": [resource_id]},
- headers={"Authorization": f"Bearer {the_token.access_token}"})
+ headers={"Authorization": f"Bearer {token['access_token']}"})
if auth.status_code == 200:
privs = tuple(priv["privilege_id"]
for role in auth.json()[resource_id]["roles"]
@@ -398,14 +402,15 @@ def __apply_deletions__(
params)
def __apply_diff__(
- conn: Connection, inbredset_id: int, diff_filename, the_diff) -> None:
+ conn: Connection, auth_token, inbredset_id: int, diff_filename, the_diff) -> None:
"""
Apply the changes in the diff at `diff_filename` to the data in the database
if the user has appropriate privileges.
"""
- required_access(
- inbredset_id, ("system:inbredset:edit-case-attribute",
- "system:inbredset:apply-case-attribute-edit"))
+ required_access(auth_token,
+ inbredset_id,
+ ("system:inbredset:edit-case-attribute",
+ "system:inbredset:apply-case-attribute-edit"))
diffs = the_diff["diff"]
with conn.cursor(cursorclass=DictCursor) as cursor:
# __apply_additions__(cursor, inbredset_id, diffs["Additions"])
@@ -419,6 +424,7 @@ def __apply_diff__(
os.rename(diff_filename, new_path)
def __reject_diff__(conn: Connection,
+ auth_token: dict,
inbredset_id: int,
diff_filename: Path,
diff: dict) -> Path:
@@ -426,38 +432,45 @@ def __reject_diff__(conn: Connection,
Reject the changes in the diff at `diff_filename` to the data in the
database if the user has appropriate privileges.
"""
- required_access(
- inbredset_id, ("system:inbredset:edit-case-attribute",
- "system:inbredset:apply-case-attribute-edit"))
+ required_access(auth_token,
+ inbredset_id,
+ ("system:inbredset:edit-case-attribute",
+ "system:inbredset:apply-case-attribute-edit"))
__save_diff__(conn, diff, EditStatus.rejected)
new_path = Path(diff_filename.parent, f"{diff_filename.stem}-rejected{diff_filename.suffix}")
os.rename(diff_filename, new_path)
return diff_filename
@caseattr.route("/<int:inbredset_id>/add", methods=["POST"])
-def add_case_attributes(inbredset_id: int) -> Response:
+@require_token
+def add_case_attributes(inbredset_id: int, auth_token=None) -> Response:
"""Add a new case attribute for `InbredSetId`."""
- required_access(inbredset_id, ("system:inbredset:create-case-attribute",))
- with (require_oauth.acquire("profile resource") as the_token, # pylint: disable=[unused-variable]
- database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable]
+ required_access(
+ auth_token, inbredset_id, ("system:inbredset:create-case-attribute",))
+ with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable]
raise NotImplementedError
@caseattr.route("/<int:inbredset_id>/delete", methods=["POST"])
-def delete_case_attributes(inbredset_id: int) -> Response:
+@require_token
+def delete_case_attributes(inbredset_id: int, auth_token=None) -> Response:
"""Delete a case attribute from `InbredSetId`."""
- required_access(inbredset_id, ("system:inbredset:delete-case-attribute",))
- with (require_oauth.acquire("profile resource") as the_token, # pylint: disable=[unused-variable]
- database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable]
+ required_access(
+ auth_token, inbredset_id, ("system:inbredset:delete-case-attribute",))
+ with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable]
raise NotImplementedError
@caseattr.route("/<int:inbredset_id>/edit", methods=["POST"])
-def edit_case_attributes(inbredset_id: int) -> Response:
- """Edit the case attributes for `InbredSetId` based on data received."""
- with (require_oauth.acquire("profile resource") as the_token,
- database_connection(current_app.config["SQL_URI"]) as conn):
- required_access(inbredset_id,
+@require_token
+def edit_case_attributes(inbredset_id: int, auth_token = None) -> Response:
+ """Edit the case attributes for `InbredSetId` based on data received.
+
+ :inbredset_id: Identifier for the population that the case attribute belongs
+ :auth_token: A validated JWT from the auth server
+ """
+ with database_connection(current_app.config["SQL_URI"]) as conn:
+ required_access(auth_token,
+ inbredset_id,
("system:inbredset:edit-case-attribute",))
- user = the_token.user
fieldnames = tuple(["Strain"] + sorted(
attr["Name"] for attr in
__case_attribute_labels_by_inbred_set__(conn, inbredset_id)))
@@ -465,7 +478,7 @@ def edit_case_attributes(inbredset_id: int) -> Response:
diff_filename = __queue_diff__(
conn, {
"inbredset_id": inbredset_id,
- "user_id": str(user.user_id),
+ "user_id": auth_token["jwt"]["sub"],
"fieldnames": fieldnames,
"diff": __compute_diff__(
fieldnames,
@@ -488,8 +501,11 @@ def edit_case_attributes(inbredset_id: int) -> Response:
return response
try:
- __apply_diff__(
- conn, inbredset_id, diff_filename, __load_diff__(diff_filename))
+ __apply_diff__(conn,
+ auth_token,
+ inbredset_id,
+ diff_filename,
+ __load_diff__(diff_filename))
return jsonify({
"diff-status": "applied",
"message": ("The changes to the case-attributes have been "
@@ -555,37 +571,45 @@ def list_diffs(inbredset_id: int) -> Response:
return resp
@caseattr.route("/approve/<path:filename>", methods=["POST"])
-def approve_case_attributes_diff(filename: str) -> Response:
+@require_token
+def approve_case_attributes_diff(filename: str, auth_token = None) -> Response:
"""Approve the changes to the case attributes in the diff."""
diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR)
diff_filename = Path(diff_dir, filename)
the_diff = __load_diff__(diff_filename)
with database_connection(current_app.config["SQL_URI"]) as conn:
- __apply_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff)
+ __apply_diff__(conn, auth_token, the_diff["inbredset_id"], diff_filename, the_diff)
return jsonify({
"message": "Applied the diff successfully.",
"diff_filename": diff_filename.name
})
@caseattr.route("/reject/<path:filename>", methods=["POST"])
-def reject_case_attributes_diff(filename: str) -> Response:
+@require_token
+def reject_case_attributes_diff(filename: str, auth_token=None) -> Response:
"""Reject the changes to the case attributes in the diff."""
diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR)
diff_filename = Path(diff_dir, filename)
the_diff = __load_diff__(diff_filename)
with database_connection(current_app.config["SQL_URI"]) as conn:
- __reject_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff)
+ __reject_diff__(conn,
+ auth_token,
+ the_diff["inbredset_id"],
+ diff_filename,
+ the_diff)
return jsonify({
"message": "Rejected diff successfully",
"diff_filename": diff_filename.name
})
@caseattr.route("/<int:inbredset_id>/diff/<int:diff_id>/view", methods=["GET"])
-def view_diff(inbredset_id: int, diff_id: int) -> Response:
+@require_token
+def view_diff(inbredset_id: int, diff_id: int, auth_token=None) -> Response:
"""View a diff."""
with (database_connection(current_app.config["SQL_URI"]) as conn,
conn.cursor(cursorclass=DictCursor) as cursor):
- required_access(inbredset_id, ("system:inbredset:view-case-attribute",))
+ required_access(
+ auth_token, inbredset_id, ("system:inbredset:view-case-attribute",))
cursor.execute(
"SELECT * FROM caseattributes_audit WHERE id=%s",
(diff_id,))
diff --git a/gn3/db/constants.py b/gn3/db/constants.py
new file mode 100644
index 0000000..45e3bfc
--- /dev/null
+++ b/gn3/db/constants.py
@@ -0,0 +1,152 @@
+"""
+This module contains some constants used in other modules.
+"""
+PREFIXES = {
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "dct": "http://purl.org/dc/terms/",
+ "ex": "http://example.org/stuff/1.0/",
+ "fabio": "http://purl.org/spar/fabio/",
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
+ "genotype": "http://genenetwork.org/genotype/",
+ "gn": "http://genenetwork.org/id/",
+ "gnc": "http://genenetwork.org/category/",
+ "gnt": "http://genenetwork.org/term/",
+ "owl": "http://www.w3.org/2002/07/owl#",
+ "phenotype": "http://genenetwork.org/phenotype/",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "publication": "http://genenetwork.org/publication/",
+ "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
+ "up": "http://purl.uniprot.org/core/",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+}
+
+RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
+ for key, value in PREFIXES.items()])
+
+BASE_CONTEXT = {
+ "data": "@graph",
+ "type": "@type",
+ "gn": "http://genenetwork.org/id/",
+ "gnc": "http://genenetwork.org/category/",
+ "gnt": "http://genenetwork.org/term/",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+}
+
+DATASET_CONTEXT = {
+ "accessRights": "dct:accessRights",
+ "accessionId": "dct:identifier",
+ "acknowledgement": "gnt:hasAcknowledgement",
+ "altLabel": "skos:altLabel",
+ "caseInfo": "gnt:hasCaseInfo",
+ "classifiedUnder": "xkos:classifiedUnder",
+ "contributors": "dct:creator",
+ "contactPoint": "dcat:contactPoint",
+ "created": "dct:created",
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "dct": "http://purl.org/dc/terms/",
+ "description": "dct:description",
+ "ex": "http://example.org/stuff/1.0/",
+ "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
+ "experimentType": "gnt:hasExperimentType",
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "geoSeriesId": "gnt:hasGeoSeriesId",
+ "gnt": "http://genenetwork.org/term/",
+ "inbredSet": "gnt:belongsToGroup",
+ "label": "rdfs:label",
+ "normalization": "gnt:usesNormalization",
+ "platformInfo": "gnt:hasPlatformInfo",
+ "notes": "gnt:hasNotes",
+ "organization": "foaf:Organization",
+ "prefLabel": "skos:prefLabel",
+ "citation": "dct:isReferencedBy",
+ "GoTree": "gnt:hasGOTreeValue",
+ "platform": "gnt:usesPlatform",
+ "processingInfo": "gnt:hasDataProcessingInfo",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "specifics": "gnt:hasContentInfo",
+ "title": "dct:title",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+ "tissueInfo": "gnt:hasTissueInfo",
+ "tissue": "gnt:hasTissue",
+ "contactWebUrl": "foaf:homepage",
+ "contactName": "foaf:name",
+}
+
+SEARCH_CONTEXT = {
+ "pages": "ex:pages",
+ "hits": "ex:hits",
+ "result": "ex:result",
+ "results": "ex:items",
+ "resultItem": "ex:resultType",
+ "currentPage": "ex:currentPage",
+}
+
+DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | {
+ "classifiedUnder": "xkos:classifiedUnder",
+ "created": "dct:created",
+ "dct": "http://purl.org/dc/terms/",
+ "ex": "http://example.org/stuff/1.0/",
+ "inbredSet": "ex:belongsToInbredSet",
+ "title": "dct:title",
+ "name": "rdfs:label",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "type": "@type",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+}
+
+PUBLICATION_CONTEXT = {
+ "dct": "http://purl.org/dc/terms/",
+ "fabio": "http://purl.org/spar/fabio/",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+ "title": "dct:title",
+ "journal": "fabio:Journal",
+ "volume": "prism:volume",
+ "page": "fabio:page",
+ "creator": "dct:creator",
+ "abstract": "dct:abstract",
+ "year": {
+ "@id": "fabio:hasPublicationYear",
+ "@type": "xsd:gYear",
+ },
+ "month": {
+ "@id": "prism:publicationDate",
+ "@type": "xsd:gMonth"
+ },
+}
+
+PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "traitName": "skos:altLabel",
+ "trait": "rdfs:label",
+ "altName": "rdfs:altLabel",
+ "description": "dct:description",
+ "abbreviation": "gnt:abbreviation",
+ "labCode": "gnt:labCode",
+ "submitter": "gnt:submitter",
+ "dataset": "dcat:Distribution",
+ "contributor": "dct:contributor",
+ "mean": "gnt:mean",
+ "locus": "gnt:locus",
+ "lodScore": "gnt:lodScore",
+ "references": "dct:isReferencedBy",
+ "additive": "gnt:additive",
+ "sequence": "gnt:sequence",
+ "prefLabel": "skos:prefLabel",
+ "identifier": "dct:identifier",
+ "chromosome": "gnt:chr",
+ "mb": "gnt:mb",
+ "peakLocation": "gnt:locus",
+ "species": "gnt:belongsToSpecies",
+ "group": "gnt:belongsToGroup",
+}
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index eb4014a..5a95683 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -4,39 +4,12 @@ This module is a collection of functions that handle SPARQL queries.
"""
import json
-
+from string import Template
from SPARQLWrapper import SPARQLWrapper
from pyld import jsonld # type: ignore
-
-
-PREFIXES = {
- "dcat": "http://www.w3.org/ns/dcat#",
- "dct": "http://purl.org/dc/terms/",
- "ex": "http://example.org/stuff/1.0/",
- "fabio": "http://purl.org/spar/fabio/",
- "foaf": "http://xmlns.com/foaf/0.1/",
- "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
- "genotype": "http://genenetwork.org/genotype/",
- "gn": "http://genenetwork.org/id/",
- "gnc": "http://genenetwork.org/category/",
- "gnt": "http://genenetwork.org/term/",
- "owl": "http://www.w3.org/2002/07/owl#",
- "phenotype": "http://genenetwork.org/phenotype/",
- "prism": "http://prismstandard.org/namespaces/basic/2.0/",
- "publication": "http://genenetwork.org/publication/",
- "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
- "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
- "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
- "skos": "http://www.w3.org/2004/02/skos/core#",
- "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
- "up": "http://purl.uniprot.org/core/",
- "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
- "xsd": "http://www.w3.org/2001/XMLSchema#",
-}
-
-
-RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
- for key, value in PREFIXES.items()])
+from gn3.db.constants import (
+ RDF_PREFIXES, BASE_CONTEXT
+)
def sparql_construct_query(query: str, endpoint: str) -> dict:
@@ -51,22 +24,101 @@ def sparql_construct_query(query: str, endpoint: str) -> dict:
def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
"""Frame and then compact the results given a context"""
results = sparql_construct_query(query, endpoint)
- if not results:
- return {}
return jsonld.compact(jsonld.frame(results, context), context)
def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
"""Compact the results given a context"""
results = sparql_construct_query(query, endpoint)
- if not results:
- return {}
return jsonld.compact(results, context)
def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
"""Frame the results given a context"""
results = sparql_construct_query(query, endpoint)
- if not results:
- return {}
return jsonld.frame(results, context)
+
+
+def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
+ """Fetch all the Wiki entries using the symbol"""
+ # This query uses a sub-query to fetch the latest comment by the
+ # version id.
+ query = Template("""
+$prefix
+
+CONSTRUCT {
+ ?uid rdfs:label ?symbolName;
+ gnt:reason ?reason ;
+ gnt:species ?species ;
+ dct:references ?pmid ;
+ foaf:homepage ?weburl ;
+ rdfs:comment ?comment ;
+ foaf:mbox ?email ;
+ gnt:initial ?usercode ;
+ gnt:belongsToCategory ?category ;
+ gnt:hasVersion ?versionId ;
+ dct:created ?created ;
+ dct:identifier ?identifier .
+} WHERE {
+ ?symbolId rdfs:label ?symbolName .
+ ?uid rdfs:comment ?comment ;
+ gnt:symbol ?symbolId ;
+ rdf:type gnc:GNWikiEntry ;
+ dct:created ?createTime .
+ FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+ {
+ SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+ ?symbolId rdfs:label ?symbolName .
+ ?uid dct:identifier ?id_ ;
+ dct:hasVersion ?vers ;
+ dct:identifier ?id_ ;
+ gnt:symbol ?symbolId .
+ FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+ }
+ }
+ ?uid dct:hasVersion ?max ;
+ dct:identifier ?id_ .
+ OPTIONAL { ?uid gnt:reason ?reason } .
+ OPTIONAL {
+ ?uid gnt:belongsToSpecies ?speciesId .
+ ?speciesId gnt:shortName ?species .
+ } .
+ OPTIONAL { ?uid dct:references ?pubmedId . } .
+ OPTIONAL { ?uid foaf:homepage ?weburl . } .
+ OPTIONAL { ?uid gnt:initial ?usercode . } .
+ OPTIONAL { ?uid gnt:mbox ?email . } .
+ OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
+ BIND (str(?version) AS ?versionId) .
+ BIND (str(?id_) AS ?identifier) .
+ BIND (str(?pubmedId) AS ?pmid) .
+ BIND (str(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
+ context = BASE_CONTEXT | {
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "dct": "http://purl.org/dc/terms/",
+ "categories": "gnt:belongsToCategory",
+ "web_url": "foaf:homepage",
+ "version": "gnt:hasVersion",
+ "symbol": "rdfs:label",
+ "reason": "gnt:reason",
+ "species": "gnt:species",
+ "pubmed_id": "dct:references",
+ "email": "foaf:mbox",
+ "initial": "gnt:initial",
+ "comment": "rdfs:comment",
+ "created": "dct:created",
+ "id": "dct:identifier",
+ # This points to the RDF Node which is the unique identifier
+ # for this triplet. It's constructed using the comment-id and
+ # the comment-versionId
+ "wiki_identifier": "@id",
+ }
+ results = query_frame_and_compact(
+ query, context,
+ sparql_uri
+ )
+ data = results.get("data")
+ if not data:
+ return results
+ return results
diff --git a/gn3/db/wiki.py b/gn3/db/wiki.py
new file mode 100644
index 0000000..abb1644
--- /dev/null
+++ b/gn3/db/wiki.py
@@ -0,0 +1,80 @@
+"""Helper functions to access wiki entries"""
+
+from typing import Dict, List
+
+from MySQLdb.cursors import DictCursor
+
+
+class MissingDBDataException(Exception):
+ """Error due to DB missing some data"""
+
+
+def get_latest_comment(connection, comment_id: str) -> int:
+ """ Latest comment is one with the highest versionId """
+ cursor = connection.cursor(DictCursor)
+ query = """ SELECT versionId AS version, symbol, PubMed_ID AS pubmed_ids, sp.Name AS species,
+ comment, email, weburl, initial, reason
+ FROM `GeneRIF` gr
+ INNER JOIN Species sp USING(SpeciesId)
+ WHERE gr.Id = %s
+ ORDER BY versionId DESC LIMIT 1;
+ """
+ cursor.execute(query, (comment_id,))
+ result = cursor.fetchone()
+ result["pubmed_ids"] = [x.strip() for x in result["pubmed_ids"].split()]
+ categories_query = """
+ SELECT grx.GeneRIFId, grx.versionId, gc.Name FROM GeneRIFXRef grx
+ INNER JOIN GeneCategory gc ON grx.GeneCategoryId=gc.Id
+ WHERE GeneRIFId = %s AND versionId=%s;
+ """
+
+ cursor.execute(categories_query, (comment_id, result["version"]))
+ categories = cursor.fetchall()
+ result["categories"] = [x["Name"] for x in categories]
+ return result
+
+
+def get_species_id(cursor, species_name: str) -> int:
+ """Find species id given species `Name`"""
+ cursor.execute("SELECT SpeciesID from Species WHERE Name = %s", (species_name,))
+ species_ids = cursor.fetchall()
+ if len(species_ids) != 1:
+ raise MissingDBDataException(
+ f"expected 1 species with Name={species_name} but found {len(species_ids)}!"
+ )
+ return species_ids[0][0]
+
+
+def get_next_comment_version(cursor, comment_id: int) -> int:
+ """Find the version to add, usually latest_version + 1"""
+ cursor.execute(
+ "SELECT MAX(versionId) as version_id from GeneRIF WHERE Id = %s", (comment_id,)
+ )
+ latest_version = cursor.fetchone()[0]
+ if latest_version is None:
+ raise MissingDBDataException(f"No comment found with comment_id={comment_id}")
+ return latest_version + 1
+
+
+def get_categories_ids(cursor, categories: List[str]) -> List[int]:
+ """Get the categories_ids from a list of category strings"""
+ dict_cats = get_categories(cursor)
+ category_ids = []
+ for category in set(categories):
+ cat_id = dict_cats.get(category.strip())
+ if cat_id is None:
+ raise MissingDBDataException(f"Category with Name={category} not found")
+ category_ids.append(cat_id)
+ return category_ids
+
+def get_categories(cursor) -> Dict[str, int]:
+ cursor.execute("SELECT Name, Id from GeneCategory")
+ raw_categories = cursor.fetchall()
+ dict_cats = dict(raw_categories)
+ return dict_cats
+
+def get_species(cursor) -> Dict[str, str]:
+ cursor.execute("SELECT Name, SpeciesName from Species")
+ raw_species = cursor.fetchall()
+ dict_cats = dict(raw_species)
+ return dict_cats
diff --git a/gn3/db_utils.py b/gn3/db_utils.py
index e4dc81f..0d9bd0a 100644
--- a/gn3/db_utils.py
+++ b/gn3/db_utils.py
@@ -1,11 +1,15 @@
"""module contains all db related stuff"""
import contextlib
+import logging
from typing import Any, Iterator, Protocol, Tuple
from urllib.parse import urlparse
import MySQLdb as mdb
import xapian
+LOGGER = logging.getLogger(__file__)
+
+
def parse_db_url(sql_uri: str) -> Tuple:
"""function to parse SQL_URI env variable note:there\
is a default value for SQL_URI so a tuple result is\
@@ -24,7 +28,7 @@ class Connection(Protocol):
@contextlib.contextmanager
-def database_connection(sql_uri) -> Iterator[Connection]:
+def database_connection(sql_uri: str, logger: logging.Logger = LOGGER) -> Iterator[Connection]:
"""Connect to MySQL database."""
host, user, passwd, db_name, port = parse_db_url(sql_uri)
connection = mdb.connect(db=db_name,
@@ -35,6 +39,7 @@ def database_connection(sql_uri) -> Iterator[Connection]:
try:
yield connection
except mdb.Error as _mbde:
+ logger.error("DB error encountered", exc_info=True)
connection.rollback()
finally:
connection.commit()
diff --git a/gn3/errors.py b/gn3/errors.py
index c53604f..ec7a554 100644
--- a/gn3/errors.py
+++ b/gn3/errors.py
@@ -15,6 +15,7 @@ from werkzeug.exceptions import NotFound
from authlib.oauth2.rfc6749.errors import OAuth2Error
from flask import Flask, jsonify, Response, current_app
+from gn3.oauth2 import errors as oautherrors
from gn3.auth.authorisation.errors import AuthorisationError
from gn3.llms.errors import LLMError
@@ -28,6 +29,7 @@ def add_trace(exc: Exception, jsonmsg: dict) -> dict:
def page_not_found(pnf):
"""Generic 404 handler."""
+ current_app.logger.error("Handling 404 errors", exc_info=True)
return jsonify(add_trace(pnf, {
"error": pnf.name,
"error_description": pnf.description
@@ -36,6 +38,7 @@ def page_not_found(pnf):
def internal_server_error(pnf):
"""Generic 404 handler."""
+ current_app.logger.error("Handling internal server errors", exc_info=True)
return jsonify(add_trace(pnf, {
"error": pnf.name,
"error_description": pnf.description
@@ -44,15 +47,16 @@ def internal_server_error(pnf):
def url_server_error(pnf):
"""Handler for an exception with a url connection."""
+ current_app.logger.error("Handling url server errors", exc_info=True)
return jsonify(add_trace(pnf, {
"error": f"URLLib Error no: {pnf.reason.errno}",
"error_description": pnf.reason.strerror,
- }))
+ })), 500
def handle_authorisation_error(exc: AuthorisationError):
"""Handle AuthorisationError if not handled anywhere else."""
- current_app.logger.error(exc)
+ current_app.logger.error("Handling external auth errors", exc_info=True)
return jsonify(add_trace(exc, {
"error": type(exc).__name__,
"error_description": " :: ".join(exc.args)
@@ -61,7 +65,7 @@ def handle_authorisation_error(exc: AuthorisationError):
def handle_oauth2_errors(exc: OAuth2Error):
"""Handle OAuth2Error if not handled anywhere else."""
- current_app.logger.error(exc)
+ current_app.logger.error("Handling external oauth2 errors", exc_info=True)
return jsonify(add_trace(exc, {
"error": exc.error,
"error_description": exc.description,
@@ -70,7 +74,7 @@ def handle_oauth2_errors(exc: OAuth2Error):
def handle_sqlite3_errors(exc: OperationalError):
"""Handle sqlite3 errors if not handled anywhere else."""
- current_app.logger.error(exc)
+ current_app.logger.error("Handling sqlite3 errors", exc_info=True)
return jsonify({
"error": "DatabaseError",
"error_description": exc.args[0],
@@ -78,24 +82,23 @@ def handle_sqlite3_errors(exc: OperationalError):
def handle_sparql_errors(exc):
- """Handle sqlite3 errors if not handled anywhere else."""
- current_app.logger.error(exc)
- __code = {
- EndPointInternalError: 500,
- EndPointNotFound: 400,
- QueryBadFormed: 400,
- Unauthorized: 401,
- URITooLong: 414,
+ """Handle sparql/virtuoso errors if not handled anywhere else."""
+ current_app.logger.error("Handling sparql errors", exc_info=True)
+ code = {
+ "EndPointInternalError": 500,
+ "EndPointNotFound": 404,
+ "QueryBadFormed": 400,
+ "Unauthorized": 401,
+ "URITooLong": 414,
}
return jsonify({
"error": exc.msg,
- "error_description": str(exc),
- }), __code.get(exc)
+ }), code.get(exc.__class__.__name__)
def handle_generic(exc: Exception) -> Response:
"""Handle generic exception."""
- current_app.logger.error(exc)
+ current_app.logger.error("Handling generic errors", exc_info=True)
resp = jsonify({
"error": type(exc).__name__,
"error_description": (
@@ -106,6 +109,15 @@ def handle_generic(exc: Exception) -> Response:
return resp
+def handle_local_authorisation_errors(exc: oautherrors.AuthorisationError):
+ """Handle errors relating to authorisation that are raised locally."""
+ current_app.logger.error("Handling local auth errors", exc_info=True)
+ return jsonify(add_trace(exc, {
+ "error": type(exc).__name__,
+ "error_description": " ".join(exc.args)
+ })), 400
+
+
def handle_llm_error(exc: Exception) -> Response:
""" Handle llm erros if not handled anywhere else. """
current_app.logger.error(exc)
diff --git a/gn3/oauth2/__init__.py b/gn3/oauth2/__init__.py
new file mode 100644
index 0000000..8001d34
--- /dev/null
+++ b/gn3/oauth2/__init__.py
@@ -0,0 +1 @@
+"""Package to handle OAuth2 authorisation and other issues."""
diff --git a/gn3/oauth2/authorisation.py b/gn3/oauth2/authorisation.py
new file mode 100644
index 0000000..b2dd1ae
--- /dev/null
+++ b/gn3/oauth2/authorisation.py
@@ -0,0 +1,34 @@
+"""Handle authorisation with auth server."""
+from functools import wraps
+
+from flask import request, jsonify, current_app as app
+
+from gn3.oauth2 import jwks
+from gn3.oauth2.errors import TokenValidationError
+
+
+def require_token(func):
+ """Check for and verify bearer token."""
+ @wraps(func)
+ def __auth__(*args, **kwargs):
+ try:
+ bearer = request.headers.get("Authorization", "")
+ if bearer.startswith("Bearer"):
+ # validate token and return it
+ _extra, token = [item.strip() for item in bearer.split(" ")]
+ _jwt = jwks.validate_token(
+ token,
+ jwks.fetch_jwks(app.config["AUTH_SERVER_URL"],
+ "auth/public-jwks"))
+ return func(*args, **{**kwargs, "auth_token": {"access_token": token, "jwt": _jwt}})
+ error_message = "We expected a bearer token but did not get one."
+ except TokenValidationError as _tve:
+ app.logger.debug("Token validation failed.", exc_info=True)
+ error_message = "The token was found to be invalid."
+
+ return jsonify({
+ "error": "TokenValidationError",
+ "description": error_message
+ }), 400
+
+ return __auth__
diff --git a/gn3/oauth2/errors.py b/gn3/oauth2/errors.py
new file mode 100644
index 0000000..f8cfd2c
--- /dev/null
+++ b/gn3/oauth2/errors.py
@@ -0,0 +1,8 @@
+"""List of possible errors."""
+
+class AuthorisationError(Exception):
+ """Top-level error class dealing with generic authorisation errors."""
+
+
+class TokenValidationError(AuthorisationError):
+ """Class to indicate that token validation failed."""
diff --git a/gn3/oauth2/jwks.py b/gn3/oauth2/jwks.py
new file mode 100644
index 0000000..8798a3f
--- /dev/null
+++ b/gn3/oauth2/jwks.py
@@ -0,0 +1,36 @@
+"""Utilities dealing with JSON Web Keys (JWK)"""
+from urllib.parse import urljoin
+
+import requests
+from flask import current_app as app
+from authlib.jose.errors import BadSignatureError
+from authlib.jose import KeySet, JsonWebKey, JsonWebToken
+
+from gn3.oauth2.errors import TokenValidationError
+
+
+def fetch_jwks(authserveruri: str, path: str = "auth/public-jwks") -> KeySet:
+ """Fetch the JWKs from a particular URI"""
+ try:
+ response = requests.get(urljoin(authserveruri, path))
+ if response.status_code == 200:
+ return KeySet([
+ JsonWebKey.import_key(key) for key in response.json()["jwks"]])
+ # XXXX: TODO: Catch specific exception we need.
+ # pylint: disable=W0703
+ except Exception as _exc:
+ app.logger.debug("There was an error fetching the JSON Web Keys.",
+ exc_info=True)
+
+ return KeySet([])
+
+
+def validate_token(token: str, keys: KeySet) -> dict:
+ """Validate the token against the given keys."""
+ for key in keys.keys:
+ try:
+ return JsonWebToken(["RS256"]).decode(token, key=key)
+ except BadSignatureError as _bse:
+ pass
+
+ raise TokenValidationError("No key was found for validation.")
diff --git a/gn3/settings.py b/gn3/settings.py
index acf3619..1e794ff 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -85,7 +85,7 @@ ROUND_TO = 10
MULTIPROCESSOR_PROCS = 6 # Number of processes to spawn
-AUTH_SERVER_URL = ""
+AUTH_SERVER_URL = "https://auth.genenetwork.org"
AUTH_MIGRATIONS = "migrations/auth"
AUTH_DB = os.environ.get(
"AUTH_DB", f"{os.environ.get('HOME')}/genenetwork/gn3_files/db/auth.db")
@@ -93,8 +93,6 @@ OAUTH2_SCOPE = (
"profile", "group", "role", "resource", "user", "masquerade",
"introspect")
-GNQA_DB = os.environ.get(
- "GNQA_DB", f"{os.environ.get('HOME')}/tmp/gnqa.db")
try:
# *** SECURITY CONCERN ***