diff options
Diffstat (limited to 'gn3/api')
-rw-r--r-- | gn3/api/correlation.py | 2 | ||||
-rw-r--r-- | gn3/api/heatmaps.py | 2 | ||||
-rw-r--r-- | gn3/api/menu.py | 2 | ||||
-rw-r--r-- | gn3/api/metadata.py | 349 | ||||
-rw-r--r-- | gn3/api/metadata_api/wiki.py | 119 | ||||
-rw-r--r-- | gn3/api/rqtl.py | 4 | ||||
-rw-r--r-- | gn3/api/search.py | 25 |
7 files changed, 154 insertions, 349 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index eb4cc7d..c77dd93 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -64,7 +64,7 @@ def compute_lit_corr(species=None, gene_id=None): might be needed for actual computing of the correlation results """ - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: target_traits_gene_ids = request.get_json() target_trait_gene_list = list(target_traits_gene_ids.items()) diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py index 632c54a..172d555 100644 --- a/gn3/api/heatmaps.py +++ b/gn3/api/heatmaps.py @@ -24,7 +24,7 @@ def clustered_heatmaps(): return jsonify({ "message": "You need to provide at least two trait names." }), 400 - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: def parse_trait_fullname(trait): name_parts = trait.split(":") return f"{name_parts[1]}::{name_parts[0]}" diff --git a/gn3/api/menu.py b/gn3/api/menu.py index 58b761e..377ac6b 100644 --- a/gn3/api/menu.py +++ b/gn3/api/menu.py @@ -10,5 +10,5 @@ menu = Blueprint("menu", __name__) @menu.route("/generate/json") def generate_json(): """Get the menu in the JSON format""" - with database_connection(current_app.config["SQL_URI"]) as conn: + with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn: return jsonify(gen_dropdown_json(conn)) diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py index 91dc115..3f28f5d 100644 --- a/gn3/api/metadata.py +++ b/gn3/api/metadata.py @@ -5,7 +5,6 @@ from string import Template from pathlib import Path from authlib.jose import jwt - from flask import Blueprint from flask import request from flask import current_app @@ -14,135 +13,20 @@ from gn3.auth.authorisation.errors import AuthorisationError from gn3.db.datasets import (retrieve_metadata, save_metadata, get_history) -from gn3.db.rdf import RDF_PREFIXES from gn3.db.rdf import (query_frame_and_compact, - query_and_compact, - query_and_frame) - - -BASE_CONTEXT = { - "data": "@graph", - "id": "@id", - "type": "@type", - "gnc": "http://genenetwork.org/category/", - "gnt": "http://genenetwork.org/term/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", -} - -DATASET_CONTEXT = { - "accessRights": "dct:accessRights", - "accessionId": "dct:identifier", - "acknowledgement": "gnt:hasAcknowledgement", - "altLabel": "skos:altLabel", - "caseInfo": "gnt:hasCaseInfo", - "classifiedUnder": "xkos:classifiedUnder", - "contributors": "dct:creator", - "contactPoint": "dcat:contactPoint", - "created": "dct:created", - "dcat": "http://www.w3.org/ns/dcat#", - "dct": "http://purl.org/dc/terms/", - "description": "dct:description", - "ex": "http://example.org/stuff/1.0/", - "experimentDesignInfo": "gnt:hasExperimentDesignInfo", - "experimentType": "gnt:hasExperimentType", - "foaf": "http://xmlns.com/foaf/0.1/", - "geoSeriesId": "gnt:hasGeoSeriesId", - "gnt": "http://genenetwork.org/term/", - "inbredSet": "gnt:belongsToGroup", - "label": "rdfs:label", - "normalization": "gnt:usesNormalization", - "platformInfo": "gnt:hasPlatformInfo", - "notes": "gnt:hasNotes", - "organization": "foaf:Organization", - "prefLabel": "skos:prefLabel", - "citation": "dct:isReferencedBy", - "GoTree": "gnt:hasGOTreeValue", - "platform": "gnt:usesPlatform", - "processingInfo": "gnt:hasDataProcessingInfo", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "skos": "http://www.w3.org/2004/02/skos/core#", - "specifics": "gnt:hasContentInfo", - "title": "dct:title", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", - "tissueInfo": "gnt:hasTissueInfo", - "tissue": "gnt:hasTissue", - "contactWebUrl": "foaf:homepage", - "contactName": "foaf:name", -} - -SEARCH_CONTEXT = { - "pages": "ex:pages", - "hits": "ex:hits", - "result": "ex:result", - "results": "ex:items", - "resultItem": "ex:resultType", - "currentPage": "ex:currentPage", -} - -DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | { - "classifiedUnder": "xkos:classifiedUnder", - "created": "dct:created", - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "inbredSet": "ex:belongsToInbredSet", - "title": "dct:title", - "name": "rdfs:label", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "type": "@type", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", -} + query_and_compact) +from gn3.db.constants import ( + RDF_PREFIXES, BASE_CONTEXT, + DATASET_CONTEXT, + DATASET_SEARCH_CONTEXT, PUBLICATION_CONTEXT, + PHENOTYPE_CONTEXT +) -PUBLICATION_CONTEXT = { - "dct": "http://purl.org/dc/terms/", - "fabio": "http://purl.org/spar/fabio/", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "title": "dct:title", - "journal": "fabio:Journal", - "volume": "prism:volume", - "page": "fabio:page", - "creator": "dct:creator", - "abstract": "dct:abstract", - "year": { - "@id": "fabio:hasPublicationYear", - "@type": "xsd:gYear", - }, - "month": { - "@id": "prism:publicationDate", - "@type": "xsd:gMonth" - }, -} +from gn3.api.metadata_api import wiki -PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | { - "skos": "http://www.w3.org/2004/02/skos/core#", - "dcat": "http://www.w3.org/ns/dcat#", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "traitName": "skos:altLabel", - "trait": "rdfs:label", - "altName": "rdfs:altLabel", - "description": "dct:description", - "abbreviation": "gnt:abbreviation", - "labCode": "gnt:labCode", - "submitter": "gnt:submitter", - "dataset": "dcat:Distribution", - "contributor": "dct:contributor", - "mean": "gnt:mean", - "locus": "gnt:locus", - "lodScore": "gnt:lodScore", - "references": "dct:isReferencedBy", - "additive": "gnt:additive", - "sequence": "gnt:sequence", - "prefLabel": "skos:prefLabel", - "identifier": "dct:identifier", - "chromosome": "gnt:chr", - "mb": "gnt:mb", - "peakLocation": "gnt:locus", - "species": "gnt:belongsToSpecies", - "group": "gnt:belongsToGroup", -} metadata = Blueprint("metadata", __name__) +metadata.register_blueprint(wiki.wiki_blueprint) @metadata.route("/datasets/<name>", methods=["GET"]) @@ -208,7 +92,7 @@ CONSTRUCT { (Path( current_app.config.get("DATA_DIR") ) / "gn-docs/general/datasets" / - Path(__result.get("id", "")).stem).as_posix() + Path(__result.get("id", "")).stem).as_posix() ) @@ -348,69 +232,6 @@ def edit_dataset(): lambda x: ("Edit successfull", 201) ) -@metadata.route("/datasets/search/<term>", methods=["GET"]) -def search_datasets(term): - """Search datasets""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ex:result rdf:type ex:resultType ; - ex:pages ?pages ; - ex:hits ?hits ; - ex:currentPage $offset ; - ex:items [ - rdfs:label ?label ; - dct:title ?title ; - ex:belongsToInbredSet ?inbredSetName ; - xkos:classifiedUnder ?datasetType ; - ] -} WHERE { -{ - SELECT DISTINCT ?dataset ?label ?inbredSetName ?datasetType ?title - WHERE { - ?dataset rdf:type dcat:Dataset ; - rdfs:label ?label ; - ?datasetPredicate ?datasetObject ; - xkos:classifiedUnder ?inbredSet . - ?inbredSet ^skos:member gnc:Set ; - rdfs:label ?inbredSetName . - ?datasetObject bif:contains "'$term'" . - OPTIONAL { - ?dataset dct:title ?title . - } . - OPTIONAL { - ?classification ^xkos:classifiedUnder ?dataset ; - ^skos:member gnc:DatasetType ; - ?typePredicate ?typeName ; - skos:prefLabel ?datasetType . - } - } ORDER BY ?dataset LIMIT $limit OFFSET $offset -} - -{ - SELECT (COUNT(DISTINCT ?dataset)/$limit+1 AS ?pages) - (COUNT(DISTINCT ?dataset) AS ?hits) WHERE { - ?dataset rdf:type dcat:Dataset ; - ?p ?o . - ?o bif:contains "'$term'" . - } -} - -} -""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | DATASET_SEARCH_CONTEXT, - "type": "resultItem", - } - return query_frame_and_compact( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - @metadata.route("/publications/<name>", methods=["GET"]) def publications(name): @@ -436,65 +257,6 @@ CONSTRUCT { ) -@metadata.route("/publications/search/<term>", methods=["GET"]) -def search_publications(term): - """Search publications""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ex:result rdf:type ex:resultType ; - ex:totalCount ?totalCount ; - ex:currentPage $offset ; - ex:items [ - rdfs:label ?publication ; - dct:title ?title ; - ] -} WHERE { -{ - SELECT ?publication ?title ?pmid WHERE { - ?pub rdf:type fabio:ResearchPaper ; - ?predicate ?object ; - dct:title ?title . - ?object bif:contains "'$term'" . - BIND( STR(?pub) AS ?publication ) . - } ORDER BY ?title LIMIT $limit OFFSET $offset - } -{ - SELECT (COUNT(*)/$limit+1 AS ?totalCount) WHERE { - ?publication rdf:type fabio:ResearchPaper ; - ?predicate ?object . - ?object bif:contains "'$term'" . - } -} -} -""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | SEARCH_CONTEXT | { - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "fabio": "http://purl.org/spar/fabio/", - "title": "dct:title", - "pubmed": "fabio:hasPubMedId", - "currentPage": "ex:currentPage", - "url": "rdfs:label", - }, - "type": "resultItem", - "paper": { - "@type": "fabio:ResearchPaper", - "@container": "@index" - } - } - return query_and_frame( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - - @metadata.route("/phenotypes/<name>", methods=["GET"]) @metadata.route("/phenotypes/<group>/<name>", methods=["GET"]) def phenotypes(name, group=None): @@ -630,97 +392,6 @@ CONSTRUCT { ) -@metadata.route("/genewikis/gn/<symbol>", methods=["GET"]) -def get_gn_genewiki_entries(symbol): - """Fetch the GN and NCBI GeneRIF entries""" - args = request.args - page = args.get("page", 0) - page_size = args.get("per-page", 10) - _query = Template(""" -$prefix - -CONSTRUCT { - ?symbol ex:entries [ - rdfs:comment ?comment ; - ex:species ?species_ ; - dct:created ?createTime ; - dct:references ?pmids ; - dct:creator ?creator ; - gnt:belongsToCategory ?categories ; - ] . - ?symbol rdf:type gnc:GNWikiEntry ; - ex:totalCount ?totalCount ; - ex:currentPage $offset . -} WHERE { -{ - SELECT ?symbol ?comment - (GROUP_CONCAT(DISTINCT ?speciesName; SEPARATOR='; ') AS ?species_) - ?createTime ?creator - (GROUP_CONCAT(DISTINCT ?pubmed; SEPARATOR='; ') AS ?pmids) - (GROUP_CONCAT(DISTINCT ?category; SEPARATOR='; ') AS ?categories) - WHERE { - ?symbol rdfs:label ?label ; - rdfs:comment _:entry . - ?label bif:contains "'$symbol'" . - _:entry rdf:type gnc:GNWikiEntry ; - rdfs:comment ?comment . - OPTIONAL { - ?species ^xkos:classifiedUnder _:entry ; - ^skos:member gnc:Species ; - skos:prefLabel ?speciesName . - } . - OPTIONAL { _:entry dct:created ?createTime . } . - OPTIONAL { _:entry dct:references ?pubmed . } . - OPTIONAL { - ?investigator foaf:name ?creator ; - ^dct:creator _:entry . - } . - OPTIONAL { _:entry gnt:belongsToCategory ?category . } . - } GROUP BY ?comment ?symbol ?createTime - ?creator ORDER BY ?createTime LIMIT $limit OFFSET $offset -} - -{ - SELECT (COUNT(DISTINCT ?comment)/$limit+1 AS ?totalCount) WHERE { - ?symbol rdfs:comment _:entry ; - rdfs:label ?label . - _:entry rdfs:comment ?comment ; - rdf:type gnc:GNWikiEntry . - ?label bif:contains "'$symbol'" . - } -} -} -""").substitute(prefix=RDF_PREFIXES, symbol=symbol, - limit=page_size, offset=page) - _context = { - "@context": BASE_CONTEXT | { - "ex": "http://example.org/stuff/1.0/", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "gnt": "http://genenetwork.org/term/", - "gnc": "http://genenetwork.org/category/", - "dct": "http://purl.org/dc/terms/", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "entries": "ex:entries", - "comment": "rdfs:comment", - "species": "ex:species", - "category": 'gnt:belongsToCategory', - "author": "dct:creator", - "pubmed": "dct:references", - "currentPage": "ex:currentPage", - "pages": "ex:totalCount", - "created": { - "@id": "dct:created", - "@type": "xsd:datetime" - }, - }, - "type": "gnc:GNWikiEntry" - } - return query_frame_and_compact( - _query, _context, - current_app.config.get("SPARQL_ENDPOINT") - ) - - @metadata.route("/genewikis/ncbi/<symbol>", methods=["GET"]) def get_ncbi_genewiki_entries(symbol): """Fetch the NCBI GeneRIF entries""" diff --git a/gn3/api/metadata_api/wiki.py b/gn3/api/metadata_api/wiki.py new file mode 100644 index 0000000..a4abef6 --- /dev/null +++ b/gn3/api/metadata_api/wiki.py @@ -0,0 +1,119 @@ +"""API for accessing/editting wiki metadata""" + +import datetime +from typing import Any, Dict +from flask import Blueprint, request, jsonify, current_app, make_response +from gn3 import db_utils +from gn3.db import wiki +from gn3.db.rdf import (query_frame_and_compact, + get_wiki_entries_by_symbol) + + +wiki_blueprint = Blueprint("wiki", __name__, url_prefix="wiki") + + +@wiki_blueprint.route("/<int:comment_id>/edit", methods=["POST"]) +def edit_wiki(comment_id: int): + """Edit wiki comment. This is achieved by adding another entry with a new VersionId""" + # FIXME: attempt to check and fix for types here with relevant errors + payload: Dict[str, Any] = request.json # type: ignore + pubmed_ids = [str(x) for x in payload.get("pubmed_ids", [])] + + insert_dict = { + "Id": comment_id, + "symbol": payload["symbol"], + "PubMed_ID": " ".join(pubmed_ids), + "comment": payload["comment"], + "email": payload["email"], + "createtime": datetime.datetime.now(datetime.timezone.utc).strftime( + "%Y-%m-%d %H:%M" + ), + "user_ip": request.environ.get("HTTP_X_REAL_IP", request.remote_addr), + "weburl": payload.get("web_url"), + "initial": payload.get("initial"), + "reason": payload["reason"], + } + + insert_query = """ + INSERT INTO GeneRIF (Id, versionId, symbol, PubMed_ID, SpeciesID, comment, + email, createtime, user_ip, weburl, initial, reason) + VALUES (%(Id)s, %(versionId)s, %(symbol)s, %(PubMed_ID)s, %(SpeciesID)s, %(comment)s, %(email)s, %(createtime)s, %(user_ip)s, %(weburl)s, %(initial)s, %(reason)s) + """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + try: + category_ids = wiki.get_categories_ids( + cursor, payload["categories"]) + species_id = wiki.get_species_id(cursor, payload["species"]) + next_version = wiki.get_next_comment_version(cursor, comment_id) + except wiki.MissingDBDataException as missing_exc: + return jsonify(error=f"Error editting wiki entry, {missing_exc}"), 500 + insert_dict["SpeciesID"] = species_id + insert_dict["versionId"] = next_version + current_app.logger.debug(f"Running query: {insert_query}") + cursor.execute(insert_query, insert_dict) + category_addition_query = """ + INSERT INTO GeneRIFXRef (GeneRIFId, versionId, GeneCategoryId) + VALUES (%s, %s, %s) + """ + + for cat_id in category_ids: + current_app.logger.debug( + f"Running query: {category_addition_query}") + cursor.execute( + category_addition_query, (comment_id, + insert_dict["versionId"], cat_id) + ) + return jsonify({"success": "ok"}) + return jsonify(error="Error editing wiki entry, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/<string:symbol>", methods=["GET"]) +def get_wiki_entries(symbol: str): + """Fetch wiki entries""" + content_type = request.headers.get("Content-Type") + status_code = 200 + response = get_wiki_entries_by_symbol( + symbol=symbol, + sparql_uri=current_app.config["SPARQL_ENDPOINT"]) + data = response.get("data") + if not data: + data = {} + status_code = 404 + if content_type == "application/ld+json": + payload = make_response(response) + payload.headers["Content-Type"] = "application/ld+json" + return payload, status_code + return jsonify(data), status_code + + +@wiki_blueprint.route("/<int:comment_id>", methods=["GET"]) +def get_wiki(comment_id: int): + """ + Gets latest wiki comments. + + TODO: fetch this from RIF + """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + return jsonify(wiki.get_latest_comment(conn, comment_id)) + return jsonify(error="Error fetching wiki entry, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/categories", methods=["GET"]) +def get_categories(): + """ Gets list of supported categories for RIF """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + categories_dict = wiki.get_categories(cursor) + return jsonify(categories_dict) + return jsonify(error="Error getting categories, most likely due to DB error!"), 500 + + +@wiki_blueprint.route("/species", methods=["GET"]) +def get_species(): + """ Gets list of all species, contains name and SpeciesName """ + with db_utils.database_connection(current_app.config["SQL_URI"]) as conn: + cursor = conn.cursor() + species_dict = wiki.get_species(cursor) + return jsonify(species_dict) + return jsonify(error="Error getting species, most likely due to DB error!"), 500 diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py index 70ebe12..ae0110d 100644 --- a/gn3/api/rqtl.py +++ b/gn3/api/rqtl.py @@ -25,11 +25,11 @@ run the rqtl_wrapper script and return the results as JSON raise FileNotFoundError # Split kwargs by those with values and boolean ones that just convert to True/False - kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"] + kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control"] boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"] all_kwargs = kwargs + boolean_kwargs - rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + rqtl_kwargs = {"geno": genofile, "pheno": phenofile, "outdir": current_app.config.get("TMPDIR")} rqtl_bool_kwargs = [] for kwarg in all_kwargs: if kwarg in request.form: diff --git a/gn3/api/search.py b/gn3/api/search.py index c741b15..f696428 100644 --- a/gn3/api/search.py +++ b/gn3/api/search.py @@ -194,23 +194,36 @@ def parse_location_field(species_query: xapian.Query, .maybe(xapian.Query.MatchNothing, make_query)) +def parse_boolean_prefixed_field(prefix: str, query: bytes) -> xapian.Query: + """Parse boolean prefixed field and return a xapian query.""" + # For some reason, xapian does not stem boolean prefixed fields + # when the query starts with a capital letter. We need it to stem + # always. Hence this function. + return xapian.Query(prefix + query.decode("utf-8").lower()) + + # pylint: disable=too-many-locals def parse_query(synteny_files_directory: Path, query: str): """Parse search query using GeneNetwork specific field processors.""" queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) - queryparser.set_stemming_strategy(queryparser.STEM_SOME) + queryparser.set_stemming_strategy(queryparser.STEM_ALL_Z) species_prefix = "XS" chromosome_prefix = "XC" queryparser.add_boolean_prefix("author", "A") queryparser.add_boolean_prefix("species", species_prefix) - queryparser.add_boolean_prefix("group", "XG") + queryparser.add_boolean_prefix("group", + FieldProcessor(partial(parse_boolean_prefixed_field, "XG"))) queryparser.add_boolean_prefix("tissue", "XI") queryparser.add_boolean_prefix("dataset", "XDS") queryparser.add_boolean_prefix("symbol", "XY") queryparser.add_boolean_prefix("chr", chromosome_prefix) queryparser.add_boolean_prefix("peakchr", "XPC") queryparser.add_prefix("description", "XD") + queryparser.add_prefix("rif", "XRF") + queryparser.add_prefix("wiki", "XWK") + queryparser.add_prefix("RIF", "XRF") + queryparser.add_prefix("WIKI", "XWK") range_prefixes = ["mean", "peak", "position", "peakmb", "additive", "year"] for i, prefix in enumerate(range_prefixes): # Treat position specially since it needs its own field processor. @@ -263,11 +276,13 @@ def search_results(): if page < 1: abort(404, description="Requested page does not exist") results_per_page = args.get("per_page", default=100, type=int) - maximum_results_per_page = 10000 + maximum_results_per_page = 50000 if results_per_page > maximum_results_per_page: abort(400, description="Requested too many search results") - - query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring) + try: + query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring) + except xapian.QueryParserError as err: + return jsonify({"error_type": str(err.get_type()), "error": err.get_msg()}), 400 traits = [] # pylint: disable=invalid-name with xapian_database(current_app.config["XAPIAN_DB_PATH"]) as db: |