about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/api/correlation.py2
-rw-r--r--gn3/api/heatmaps.py2
-rw-r--r--gn3/api/menu.py2
-rw-r--r--gn3/api/metadata.py349
-rw-r--r--gn3/api/metadata_api/wiki.py119
-rw-r--r--gn3/api/rqtl.py4
-rw-r--r--gn3/api/search.py25
-rw-r--r--gn3/case_attributes.py96
-rw-r--r--gn3/db/constants.py152
-rw-r--r--gn3/db/rdf.py126
-rw-r--r--gn3/db/wiki.py80
-rw-r--r--gn3/db_utils.py7
-rw-r--r--gn3/errors.py42
-rw-r--r--gn3/oauth2/__init__.py1
-rw-r--r--gn3/oauth2/authorisation.py34
-rw-r--r--gn3/oauth2/errors.py8
-rw-r--r--gn3/oauth2/jwks.py36
-rw-r--r--gn3/settings.py4
-rwxr-xr-xscripts/index-genenetwork251
-rw-r--r--scripts/rqtl_wrapper.R16
-rwxr-xr-xscripts/update_rif_table.py167
21 files changed, 1043 insertions, 480 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index eb4cc7d..c77dd93 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -64,7 +64,7 @@ def compute_lit_corr(species=None, gene_id=None):
     might be needed for actual computing of the correlation results
     """
 
-    with database_connection(current_app.config["SQL_URI"]) as conn:
+    with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
         target_traits_gene_ids = request.get_json()
         target_trait_gene_list = list(target_traits_gene_ids.items())
 
diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py
index 632c54a..172d555 100644
--- a/gn3/api/heatmaps.py
+++ b/gn3/api/heatmaps.py
@@ -24,7 +24,7 @@ def clustered_heatmaps():
         return jsonify({
             "message": "You need to provide at least two trait names."
         }), 400
-    with database_connection(current_app.config["SQL_URI"]) as conn:
+    with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
         def parse_trait_fullname(trait):
             name_parts = trait.split(":")
             return f"{name_parts[1]}::{name_parts[0]}"
diff --git a/gn3/api/menu.py b/gn3/api/menu.py
index 58b761e..377ac6b 100644
--- a/gn3/api/menu.py
+++ b/gn3/api/menu.py
@@ -10,5 +10,5 @@ menu = Blueprint("menu", __name__)
 @menu.route("/generate/json")
 def generate_json():
     """Get the menu in the JSON format"""
-    with database_connection(current_app.config["SQL_URI"]) as conn:
+    with database_connection(current_app.config["SQL_URI"], logger=current_app.logger) as conn:
         return jsonify(gen_dropdown_json(conn))
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index 91dc115..3f28f5d 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -5,7 +5,6 @@ from string import Template
 from pathlib import Path
 
 from authlib.jose import jwt
-
 from flask import Blueprint
 from flask import request
 from flask import current_app
@@ -14,135 +13,20 @@ from gn3.auth.authorisation.errors import AuthorisationError
 from gn3.db.datasets import (retrieve_metadata,
                              save_metadata,
                              get_history)
-from gn3.db.rdf import RDF_PREFIXES
 from gn3.db.rdf import (query_frame_and_compact,
-                        query_and_compact,
-                        query_and_frame)
-
-
-BASE_CONTEXT = {
-    "data": "@graph",
-    "id": "@id",
-    "type": "@type",
-    "gnc": "http://genenetwork.org/category/",
-    "gnt": "http://genenetwork.org/term/",
-    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
-}
-
-DATASET_CONTEXT = {
-    "accessRights": "dct:accessRights",
-    "accessionId": "dct:identifier",
-    "acknowledgement": "gnt:hasAcknowledgement",
-    "altLabel": "skos:altLabel",
-    "caseInfo": "gnt:hasCaseInfo",
-    "classifiedUnder": "xkos:classifiedUnder",
-    "contributors": "dct:creator",
-    "contactPoint": "dcat:contactPoint",
-    "created":  "dct:created",
-    "dcat": "http://www.w3.org/ns/dcat#",
-    "dct": "http://purl.org/dc/terms/",
-    "description": "dct:description",
-    "ex": "http://example.org/stuff/1.0/",
-    "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
-    "experimentType": "gnt:hasExperimentType",
-    "foaf": "http://xmlns.com/foaf/0.1/",
-    "geoSeriesId": "gnt:hasGeoSeriesId",
-    "gnt": "http://genenetwork.org/term/",
-    "inbredSet": "gnt:belongsToGroup",
-    "label": "rdfs:label",
-    "normalization": "gnt:usesNormalization",
-    "platformInfo": "gnt:hasPlatformInfo",
-    "notes": "gnt:hasNotes",
-    "organization": "foaf:Organization",
-    "prefLabel": "skos:prefLabel",
-    "citation": "dct:isReferencedBy",
-    "GoTree": "gnt:hasGOTreeValue",
-    "platform": "gnt:usesPlatform",
-    "processingInfo": "gnt:hasDataProcessingInfo",
-    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    "skos": "http://www.w3.org/2004/02/skos/core#",
-    "specifics": "gnt:hasContentInfo",
-    "title": "dct:title",
-    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
-    "tissueInfo": "gnt:hasTissueInfo",
-    "tissue": "gnt:hasTissue",
-    "contactWebUrl": "foaf:homepage",
-    "contactName": "foaf:name",
-}
-
-SEARCH_CONTEXT = {
-    "pages": "ex:pages",
-    "hits": "ex:hits",
-    "result": "ex:result",
-    "results": "ex:items",
-    "resultItem": "ex:resultType",
-    "currentPage": "ex:currentPage",
-}
-
-DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | {
-    "classifiedUnder": "xkos:classifiedUnder",
-    "created":  "dct:created",
-    "dct": "http://purl.org/dc/terms/",
-    "ex": "http://example.org/stuff/1.0/",
-    "inbredSet": "ex:belongsToInbredSet",
-    "title": "dct:title",
-    "name": "rdfs:label",
-    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    "type": "@type",
-    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
-}
+                        query_and_compact)
+from gn3.db.constants import (
+    RDF_PREFIXES, BASE_CONTEXT,
+    DATASET_CONTEXT,
+    DATASET_SEARCH_CONTEXT, PUBLICATION_CONTEXT,
+    PHENOTYPE_CONTEXT
+)
 
-PUBLICATION_CONTEXT = {
-    "dct": "http://purl.org/dc/terms/",
-    "fabio": "http://purl.org/spar/fabio/",
-    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
-    "xsd": "http://www.w3.org/2001/XMLSchema#",
-    "title": "dct:title",
-    "journal": "fabio:Journal",
-    "volume": "prism:volume",
-    "page": "fabio:page",
-    "creator": "dct:creator",
-    "abstract": "dct:abstract",
-    "year": {
-        "@id": "fabio:hasPublicationYear",
-        "@type": "xsd:gYear",
-    },
-    "month": {
-        "@id": "prism:publicationDate",
-        "@type": "xsd:gMonth"
-    },
-}
+from gn3.api.metadata_api import wiki
 
-PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
-    "skos": "http://www.w3.org/2004/02/skos/core#",
-    "dcat": "http://www.w3.org/ns/dcat#",
-    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
-    "traitName": "skos:altLabel",
-    "trait": "rdfs:label",
-    "altName": "rdfs:altLabel",
-    "description": "dct:description",
-    "abbreviation": "gnt:abbreviation",
-    "labCode": "gnt:labCode",
-    "submitter": "gnt:submitter",
-    "dataset": "dcat:Distribution",
-    "contributor": "dct:contributor",
-    "mean": "gnt:mean",
-    "locus": "gnt:locus",
-    "lodScore": "gnt:lodScore",
-    "references": "dct:isReferencedBy",
-    "additive": "gnt:additive",
-    "sequence": "gnt:sequence",
-    "prefLabel": "skos:prefLabel",
-    "identifier": "dct:identifier",
-    "chromosome": "gnt:chr",
-    "mb": "gnt:mb",
-    "peakLocation": "gnt:locus",
-    "species": "gnt:belongsToSpecies",
-    "group": "gnt:belongsToGroup",
-}
 
 metadata = Blueprint("metadata", __name__)
+metadata.register_blueprint(wiki.wiki_blueprint)
 
 
 @metadata.route("/datasets/<name>", methods=["GET"])
@@ -208,7 +92,7 @@ CONSTRUCT {
         (Path(
             current_app.config.get("DATA_DIR")
         ) / "gn-docs/general/datasets" /
-         Path(__result.get("id", "")).stem).as_posix()
+            Path(__result.get("id", "")).stem).as_posix()
     )
 
 
@@ -348,69 +232,6 @@ def edit_dataset():
                 lambda x: ("Edit successfull", 201)
             )
 
-@metadata.route("/datasets/search/<term>", methods=["GET"])
-def search_datasets(term):
-    """Search datasets"""
-    args = request.args
-    page = args.get("page", 0)
-    page_size = args.get("per-page", 10)
-    _query = Template("""
-$prefix
-
-CONSTRUCT {
-        ex:result rdf:type ex:resultType ;
-                  ex:pages ?pages ;
-                  ex:hits ?hits ;
-                  ex:currentPage $offset ;
-                  ex:items [
-                    rdfs:label ?label ;
-                    dct:title ?title ;
-                    ex:belongsToInbredSet ?inbredSetName ;
-                    xkos:classifiedUnder ?datasetType ;
-          ]
-} WHERE {
-{
-        SELECT DISTINCT ?dataset ?label ?inbredSetName ?datasetType ?title
-           WHERE {
-        ?dataset rdf:type dcat:Dataset ;
-                 rdfs:label ?label ;
-                 ?datasetPredicate ?datasetObject ;
-                 xkos:classifiedUnder ?inbredSet .
-        ?inbredSet ^skos:member gnc:Set ;
-                   rdfs:label ?inbredSetName .
-        ?datasetObject bif:contains "'$term'" .
-        OPTIONAL {
-          ?dataset dct:title ?title .
-        } .
-        OPTIONAL {
-          ?classification ^xkos:classifiedUnder ?dataset ;
-                          ^skos:member gnc:DatasetType ;
-                          ?typePredicate ?typeName ;
-                          skos:prefLabel ?datasetType .
-        }
-    } ORDER BY ?dataset LIMIT $limit OFFSET $offset
-}
-
-{
-        SELECT (COUNT(DISTINCT ?dataset)/$limit+1 AS ?pages)
-            (COUNT(DISTINCT ?dataset) AS ?hits) WHERE {
-        ?dataset rdf:type dcat:Dataset ;
-                 ?p ?o .
-        ?o bif:contains "'$term'" .
-        }
-}
-
-}
-""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page)
-    _context = {
-        "@context": BASE_CONTEXT | DATASET_SEARCH_CONTEXT,
-        "type": "resultItem",
-    }
-    return query_frame_and_compact(
-        _query, _context,
-        current_app.config.get("SPARQL_ENDPOINT")
-    )
-
 
 @metadata.route("/publications/<name>", methods=["GET"])
 def publications(name):
@@ -436,65 +257,6 @@ CONSTRUCT {
     )
 
 
-@metadata.route("/publications/search/<term>", methods=["GET"])
-def search_publications(term):
-    """Search publications"""
-    args = request.args
-    page = args.get("page", 0)
-    page_size = args.get("per-page", 10)
-    _query = Template("""
-$prefix
-
-CONSTRUCT {
-        ex:result rdf:type ex:resultType ;
-                  ex:totalCount ?totalCount ;
-                  ex:currentPage $offset ;
-                  ex:items [
-                     rdfs:label ?publication ;
-                     dct:title ?title ;
-        ]
-} WHERE {
-{
-        SELECT ?publication ?title ?pmid WHERE {
-        ?pub rdf:type fabio:ResearchPaper ;
-             ?predicate ?object ;
-             dct:title ?title .
-        ?object bif:contains "'$term'" .
-        BIND( STR(?pub) AS ?publication ) .
-        }  ORDER BY ?title LIMIT $limit OFFSET $offset
-    }
-{
-        SELECT (COUNT(*)/$limit+1 AS ?totalCount) WHERE {
-        ?publication rdf:type fabio:ResearchPaper ;
-                     ?predicate ?object .
-        ?object bif:contains "'$term'" .
-        }
-}
-}
-""").substitute(prefix=RDF_PREFIXES, term=term, limit=page_size, offset=page)
-    _context = {
-        "@context": BASE_CONTEXT | SEARCH_CONTEXT | {
-            "dct": "http://purl.org/dc/terms/",
-            "ex": "http://example.org/stuff/1.0/",
-            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-            "fabio": "http://purl.org/spar/fabio/",
-            "title": "dct:title",
-            "pubmed": "fabio:hasPubMedId",
-            "currentPage": "ex:currentPage",
-            "url": "rdfs:label",
-        },
-        "type": "resultItem",
-        "paper": {
-            "@type": "fabio:ResearchPaper",
-            "@container": "@index"
-        }
-    }
-    return query_and_frame(
-        _query, _context,
-        current_app.config.get("SPARQL_ENDPOINT")
-    )
-
-
 @metadata.route("/phenotypes/<name>", methods=["GET"])
 @metadata.route("/phenotypes/<group>/<name>", methods=["GET"])
 def phenotypes(name, group=None):
@@ -630,97 +392,6 @@ CONSTRUCT {
     )
 
 
-@metadata.route("/genewikis/gn/<symbol>", methods=["GET"])
-def get_gn_genewiki_entries(symbol):
-    """Fetch the GN and NCBI GeneRIF entries"""
-    args = request.args
-    page = args.get("page", 0)
-    page_size = args.get("per-page", 10)
-    _query = Template("""
-$prefix
-
-CONSTRUCT {
-         ?symbol ex:entries [
-              rdfs:comment ?comment ;
-              ex:species ?species_ ;
-              dct:created ?createTime ;
-              dct:references ?pmids ;
-              dct:creator ?creator ;
-              gnt:belongsToCategory ?categories ;
-         ] .
-         ?symbol rdf:type gnc:GNWikiEntry ;
-                 ex:totalCount ?totalCount ;
-                 ex:currentPage $offset .
-} WHERE {
-{
-    SELECT ?symbol ?comment
-        (GROUP_CONCAT(DISTINCT ?speciesName; SEPARATOR='; ') AS ?species_)
-        ?createTime ?creator
-        (GROUP_CONCAT(DISTINCT ?pubmed; SEPARATOR='; ') AS ?pmids)
-        (GROUP_CONCAT(DISTINCT ?category; SEPARATOR='; ') AS ?categories)
-        WHERE {
-        ?symbol rdfs:label ?label ;
-                rdfs:comment _:entry .
-        ?label bif:contains "'$symbol'" .
-        _:entry rdf:type gnc:GNWikiEntry ;
-                rdfs:comment ?comment .
-        OPTIONAL {
-        ?species ^xkos:classifiedUnder _:entry ;
-                 ^skos:member gnc:Species ;
-                 skos:prefLabel ?speciesName .
-        } .
-        OPTIONAL { _:entry dct:created ?createTime . } .
-        OPTIONAL { _:entry dct:references ?pubmed . } .
-        OPTIONAL {
-        ?investigator foaf:name ?creator ;
-                      ^dct:creator _:entry .
-        } .
-        OPTIONAL { _:entry gnt:belongsToCategory ?category . } .
-    } GROUP BY ?comment ?symbol ?createTime
-      ?creator ORDER BY ?createTime LIMIT $limit OFFSET $offset
-}
-
-{
-        SELECT (COUNT(DISTINCT ?comment)/$limit+1 AS ?totalCount) WHERE {
-        ?symbol rdfs:comment _:entry ;
-                rdfs:label ?label .
-        _:entry rdfs:comment ?comment ;
-                rdf:type gnc:GNWikiEntry .
-        ?label bif:contains "'$symbol'" .
-        }
-}
-}
-""").substitute(prefix=RDF_PREFIXES, symbol=symbol,
-                limit=page_size, offset=page)
-    _context = {
-        "@context": BASE_CONTEXT | {
-            "ex": "http://example.org/stuff/1.0/",
-            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-            "gnt": "http://genenetwork.org/term/",
-            "gnc": "http://genenetwork.org/category/",
-            "dct": "http://purl.org/dc/terms/",
-            "xsd": "http://www.w3.org/2001/XMLSchema#",
-            "entries": "ex:entries",
-            "comment": "rdfs:comment",
-            "species": "ex:species",
-            "category": 'gnt:belongsToCategory',
-            "author": "dct:creator",
-            "pubmed": "dct:references",
-            "currentPage": "ex:currentPage",
-            "pages": "ex:totalCount",
-            "created": {
-                "@id": "dct:created",
-                "@type": "xsd:datetime"
-            },
-        },
-        "type": "gnc:GNWikiEntry"
-    }
-    return query_frame_and_compact(
-        _query, _context,
-        current_app.config.get("SPARQL_ENDPOINT")
-    )
-
-
 @metadata.route("/genewikis/ncbi/<symbol>", methods=["GET"])
 def get_ncbi_genewiki_entries(symbol):
     """Fetch the NCBI GeneRIF entries"""
diff --git a/gn3/api/metadata_api/wiki.py b/gn3/api/metadata_api/wiki.py
new file mode 100644
index 0000000..a4abef6
--- /dev/null
+++ b/gn3/api/metadata_api/wiki.py
@@ -0,0 +1,119 @@
+"""API for accessing/editting wiki metadata"""
+
+import datetime
+from typing import Any, Dict
+from flask import Blueprint, request, jsonify, current_app, make_response
+from gn3 import db_utils
+from gn3.db import wiki
+from gn3.db.rdf import (query_frame_and_compact,
+                        get_wiki_entries_by_symbol)
+
+
+wiki_blueprint = Blueprint("wiki", __name__, url_prefix="wiki")
+
+
+@wiki_blueprint.route("/<int:comment_id>/edit", methods=["POST"])
+def edit_wiki(comment_id: int):
+    """Edit wiki comment. This is achieved by adding another entry with a new VersionId"""
+    # FIXME: attempt to check and fix for types here with relevant errors
+    payload: Dict[str, Any] = request.json  # type: ignore
+    pubmed_ids = [str(x) for x in payload.get("pubmed_ids", [])]
+
+    insert_dict = {
+        "Id": comment_id,
+        "symbol": payload["symbol"],
+        "PubMed_ID": " ".join(pubmed_ids),
+        "comment": payload["comment"],
+        "email": payload["email"],
+        "createtime": datetime.datetime.now(datetime.timezone.utc).strftime(
+            "%Y-%m-%d %H:%M"
+        ),
+        "user_ip": request.environ.get("HTTP_X_REAL_IP", request.remote_addr),
+        "weburl": payload.get("web_url"),
+        "initial": payload.get("initial"),
+        "reason": payload["reason"],
+    }
+
+    insert_query = """
+    INSERT INTO GeneRIF (Id, versionId, symbol, PubMed_ID, SpeciesID, comment,
+                         email, createtime, user_ip, weburl, initial, reason)
+    VALUES (%(Id)s, %(versionId)s, %(symbol)s, %(PubMed_ID)s, %(SpeciesID)s, %(comment)s, %(email)s, %(createtime)s, %(user_ip)s, %(weburl)s, %(initial)s, %(reason)s)
+    """
+    with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+        cursor = conn.cursor()
+        try:
+            category_ids = wiki.get_categories_ids(
+                cursor, payload["categories"])
+            species_id = wiki.get_species_id(cursor, payload["species"])
+            next_version = wiki.get_next_comment_version(cursor, comment_id)
+        except wiki.MissingDBDataException as missing_exc:
+            return jsonify(error=f"Error editting wiki entry, {missing_exc}"), 500
+        insert_dict["SpeciesID"] = species_id
+        insert_dict["versionId"] = next_version
+        current_app.logger.debug(f"Running query: {insert_query}")
+        cursor.execute(insert_query, insert_dict)
+        category_addition_query = """
+            INSERT INTO GeneRIFXRef (GeneRIFId, versionId, GeneCategoryId)
+                VALUES (%s, %s, %s)
+            """
+
+        for cat_id in category_ids:
+            current_app.logger.debug(
+                f"Running query: {category_addition_query}")
+            cursor.execute(
+                category_addition_query, (comment_id,
+                                          insert_dict["versionId"], cat_id)
+            )
+        return jsonify({"success": "ok"})
+    return jsonify(error="Error editing wiki entry, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/<string:symbol>", methods=["GET"])
+def get_wiki_entries(symbol: str):
+    """Fetch wiki entries"""
+    content_type = request.headers.get("Content-Type")
+    status_code = 200
+    response = get_wiki_entries_by_symbol(
+        symbol=symbol,
+        sparql_uri=current_app.config["SPARQL_ENDPOINT"])
+    data = response.get("data")
+    if not data:
+        data = {}
+        status_code = 404
+    if content_type == "application/ld+json":
+        payload = make_response(response)
+        payload.headers["Content-Type"] = "application/ld+json"
+        return payload, status_code
+    return jsonify(data), status_code
+
+
+@wiki_blueprint.route("/<int:comment_id>", methods=["GET"])
+def get_wiki(comment_id: int):
+    """
+    Gets latest wiki comments.
+
+    TODO: fetch this from RIF
+    """
+    with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+        return jsonify(wiki.get_latest_comment(conn, comment_id))
+    return jsonify(error="Error fetching wiki entry, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/categories", methods=["GET"])
+def get_categories():
+    """ Gets list of supported categories for RIF """
+    with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+        cursor = conn.cursor()
+        categories_dict = wiki.get_categories(cursor)
+        return jsonify(categories_dict)
+    return jsonify(error="Error getting categories, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/species", methods=["GET"])
+def get_species():
+    """ Gets list of all species, contains name and SpeciesName """
+    with db_utils.database_connection(current_app.config["SQL_URI"]) as conn:
+        cursor = conn.cursor()
+        species_dict = wiki.get_species(cursor)
+        return jsonify(species_dict)
+    return jsonify(error="Error getting species, most likely due to DB error!"), 500
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index 70ebe12..ae0110d 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -25,11 +25,11 @@ run the rqtl_wrapper script and return the results as JSON
         raise FileNotFoundError
 
     # Split kwargs by those with values and boolean ones that just convert to True/False
-    kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control_marker"]
+    kwargs = ["covarstruct", "model", "method", "nperm", "scale", "control"]
     boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
     all_kwargs = kwargs + boolean_kwargs
 
-    rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
+    rqtl_kwargs = {"geno": genofile, "pheno": phenofile, "outdir": current_app.config.get("TMPDIR")}
     rqtl_bool_kwargs = []
     for kwarg in all_kwargs:
         if kwarg in request.form:
diff --git a/gn3/api/search.py b/gn3/api/search.py
index c741b15..f696428 100644
--- a/gn3/api/search.py
+++ b/gn3/api/search.py
@@ -194,23 +194,36 @@ def parse_location_field(species_query: xapian.Query,
             .maybe(xapian.Query.MatchNothing, make_query))
 
 
+def parse_boolean_prefixed_field(prefix: str, query: bytes) -> xapian.Query:
+    """Parse boolean prefixed field and return a xapian query."""
+    # For some reason, xapian does not stem boolean prefixed fields
+    # when the query starts with a capital letter. We need it to stem
+    # always. Hence this function.
+    return xapian.Query(prefix + query.decode("utf-8").lower())
+
+
 # pylint: disable=too-many-locals
 def parse_query(synteny_files_directory: Path, query: str):
     """Parse search query using GeneNetwork specific field processors."""
     queryparser = xapian.QueryParser()
     queryparser.set_stemmer(xapian.Stem("en"))
-    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
+    queryparser.set_stemming_strategy(queryparser.STEM_ALL_Z)
     species_prefix = "XS"
     chromosome_prefix = "XC"
     queryparser.add_boolean_prefix("author", "A")
     queryparser.add_boolean_prefix("species", species_prefix)
-    queryparser.add_boolean_prefix("group", "XG")
+    queryparser.add_boolean_prefix("group",
+                                   FieldProcessor(partial(parse_boolean_prefixed_field, "XG")))
     queryparser.add_boolean_prefix("tissue", "XI")
     queryparser.add_boolean_prefix("dataset", "XDS")
     queryparser.add_boolean_prefix("symbol", "XY")
     queryparser.add_boolean_prefix("chr", chromosome_prefix)
     queryparser.add_boolean_prefix("peakchr", "XPC")
     queryparser.add_prefix("description", "XD")
+    queryparser.add_prefix("rif", "XRF")
+    queryparser.add_prefix("wiki", "XWK")
+    queryparser.add_prefix("RIF", "XRF")
+    queryparser.add_prefix("WIKI", "XWK")
     range_prefixes = ["mean", "peak", "position", "peakmb", "additive", "year"]
     for i, prefix in enumerate(range_prefixes):
         # Treat position specially since it needs its own field processor.
@@ -263,11 +276,13 @@ def search_results():
     if page < 1:
         abort(404, description="Requested page does not exist")
     results_per_page = args.get("per_page", default=100, type=int)
-    maximum_results_per_page = 10000
+    maximum_results_per_page = 50000
     if results_per_page > maximum_results_per_page:
         abort(400, description="Requested too many search results")
-
-    query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring)
+    try:
+        query = parse_query(Path(current_app.config["DATA_DIR"]) / "synteny", querystring)
+    except xapian.QueryParserError as err:
+        return jsonify({"error_type": str(err.get_type()), "error": err.get_msg()}), 400
     traits = []
     # pylint: disable=invalid-name
     with xapian_database(current_app.config["XAPIAN_DB_PATH"]) as db:
diff --git a/gn3/case_attributes.py b/gn3/case_attributes.py
index d973b8e..efc82e9 100644
--- a/gn3/case_attributes.py
+++ b/gn3/case_attributes.py
@@ -26,8 +26,8 @@ from gn3.commands import run_cmd
 
 from gn3.db_utils import Connection, database_connection
 
+from gn3.oauth2.authorisation import require_token
 from gn3.auth.authorisation.errors import AuthorisationError
-from gn3.auth.authorisation.oauth2.resource_server import require_oauth
 
 caseattr = Blueprint("case-attribute", __name__)
 
@@ -61,8 +61,10 @@ class CAJSONEncoder(json.JSONEncoder):
         return json.JSONEncoder.default(self, obj)
 
 def required_access(
-        inbredset_id: int, access_levels: tuple[str, ...]) -> Union[
-            bool, tuple[str, ...]]:
+        token: dict,
+        inbredset_id: int,
+        access_levels: tuple[str, ...]
+) -> Union[bool, tuple[str, ...]]:
     """Check whether the user has the appropriate access"""
     def __species_id__(conn):
         with conn.cursor() as cursor:
@@ -71,19 +73,21 @@ def required_access(
                 (inbredset_id,))
             return cursor.fetchone()[0]
     try:
-        with (require_oauth.acquire("profile resource") as the_token,
-              database_connection(current_app.config["SQL_URI"]) as conn):
+        with database_connection(current_app.config["SQL_URI"]) as conn:
             result = requests.get(
+                # this section fetches the resource ID from the auth server
                 urljoin(current_app.config["AUTH_SERVER_URL"],
                         "auth/resource/inbredset/resource-id"
                         f"/{__species_id__(conn)}/{inbredset_id}"))
             if result.status_code == 200:
                 resource_id = result.json()["resource-id"]
                 auth = requests.post(
+                    # this section fetches the authorisations/privileges that
+                    # the current user has on the resource we got above
                     urljoin(current_app.config["AUTH_SERVER_URL"],
                             "auth/resource/authorisation"),
                     json={"resource-ids": [resource_id]},
-                    headers={"Authorization": f"Bearer {the_token.access_token}"})
+                    headers={"Authorization": f"Bearer {token['access_token']}"})
                 if auth.status_code == 200:
                     privs = tuple(priv["privilege_id"]
                                   for role in auth.json()[resource_id]["roles"]
@@ -398,14 +402,15 @@ def __apply_deletions__(
         params)
 
 def __apply_diff__(
-        conn: Connection, inbredset_id: int, diff_filename, the_diff) -> None:
+        conn: Connection, auth_token, inbredset_id: int, diff_filename, the_diff) -> None:
     """
     Apply the changes in the diff at `diff_filename` to the data in the database
     if the user has appropriate privileges.
     """
-    required_access(
-        inbredset_id, ("system:inbredset:edit-case-attribute",
-                       "system:inbredset:apply-case-attribute-edit"))
+    required_access(auth_token,
+                    inbredset_id,
+                    ("system:inbredset:edit-case-attribute",
+                     "system:inbredset:apply-case-attribute-edit"))
     diffs = the_diff["diff"]
     with conn.cursor(cursorclass=DictCursor) as cursor:
         # __apply_additions__(cursor, inbredset_id, diffs["Additions"])
@@ -419,6 +424,7 @@ def __apply_diff__(
         os.rename(diff_filename, new_path)
 
 def __reject_diff__(conn: Connection,
+                    auth_token: dict,
                     inbredset_id: int,
                     diff_filename: Path,
                     diff: dict) -> Path:
@@ -426,38 +432,45 @@ def __reject_diff__(conn: Connection,
     Reject the changes in the diff at `diff_filename` to the data in the
     database if the user has appropriate privileges.
     """
-    required_access(
-        inbredset_id, ("system:inbredset:edit-case-attribute",
-                       "system:inbredset:apply-case-attribute-edit"))
+    required_access(auth_token,
+                    inbredset_id,
+                    ("system:inbredset:edit-case-attribute",
+                     "system:inbredset:apply-case-attribute-edit"))
     __save_diff__(conn, diff, EditStatus.rejected)
     new_path = Path(diff_filename.parent, f"{diff_filename.stem}-rejected{diff_filename.suffix}")
     os.rename(diff_filename, new_path)
     return diff_filename
 
 @caseattr.route("/<int:inbredset_id>/add", methods=["POST"])
-def add_case_attributes(inbredset_id: int) -> Response:
+@require_token
+def add_case_attributes(inbredset_id: int, auth_token=None) -> Response:
     """Add a new case attribute for `InbredSetId`."""
-    required_access(inbredset_id, ("system:inbredset:create-case-attribute",))
-    with (require_oauth.acquire("profile resource") as the_token,      # pylint: disable=[unused-variable]
-          database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable]
+    required_access(
+        auth_token, inbredset_id, ("system:inbredset:create-case-attribute",))
+    with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable]
         raise NotImplementedError
 
 @caseattr.route("/<int:inbredset_id>/delete", methods=["POST"])
-def delete_case_attributes(inbredset_id: int) -> Response:
+@require_token
+def delete_case_attributes(inbredset_id: int, auth_token=None) -> Response:
     """Delete a case attribute from `InbredSetId`."""
-    required_access(inbredset_id, ("system:inbredset:delete-case-attribute",))
-    with (require_oauth.acquire("profile resource") as the_token,      # pylint: disable=[unused-variable]
-          database_connection(current_app.config["SQL_URI"]) as conn): # pylint: disable=[unused-variable]
+    required_access(
+        auth_token, inbredset_id, ("system:inbredset:delete-case-attribute",))
+    with database_connection(current_app.config["SQL_URI"]) as conn: # pylint: disable=[unused-variable]
         raise NotImplementedError
 
 @caseattr.route("/<int:inbredset_id>/edit", methods=["POST"])
-def edit_case_attributes(inbredset_id: int) -> Response:
-    """Edit the case attributes for `InbredSetId` based on data received."""
-    with (require_oauth.acquire("profile resource") as the_token,
-          database_connection(current_app.config["SQL_URI"]) as conn):
-        required_access(inbredset_id,
+@require_token
+def edit_case_attributes(inbredset_id: int, auth_token = None) -> Response:
+    """Edit the case attributes for `InbredSetId` based on data received.
+
+    :inbredset_id: Identifier for the population that the case attribute belongs
+    :auth_token: A validated JWT from the auth server
+    """
+    with database_connection(current_app.config["SQL_URI"]) as conn:
+        required_access(auth_token,
+                        inbredset_id,
                         ("system:inbredset:edit-case-attribute",))
-        user = the_token.user
         fieldnames = tuple(["Strain"] + sorted(
             attr["Name"] for attr in
             __case_attribute_labels_by_inbred_set__(conn, inbredset_id)))
@@ -465,7 +478,7 @@ def edit_case_attributes(inbredset_id: int) -> Response:
             diff_filename = __queue_diff__(
                 conn, {
                     "inbredset_id": inbredset_id,
-                    "user_id": str(user.user_id),
+                    "user_id": auth_token["jwt"]["sub"],
                     "fieldnames": fieldnames,
                     "diff": __compute_diff__(
                         fieldnames,
@@ -488,8 +501,11 @@ def edit_case_attributes(inbredset_id: int) -> Response:
             return response
 
         try:
-            __apply_diff__(
-                conn, inbredset_id, diff_filename, __load_diff__(diff_filename))
+            __apply_diff__(conn,
+                           auth_token,
+                           inbredset_id,
+                           diff_filename,
+                           __load_diff__(diff_filename))
             return jsonify({
                 "diff-status": "applied",
                 "message": ("The changes to the case-attributes have been "
@@ -555,37 +571,45 @@ def list_diffs(inbredset_id: int) -> Response:
     return resp
 
 @caseattr.route("/approve/<path:filename>", methods=["POST"])
-def approve_case_attributes_diff(filename: str) -> Response:
+@require_token
+def approve_case_attributes_diff(filename: str, auth_token = None) -> Response:
     """Approve the changes to the case attributes in the diff."""
     diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR)
     diff_filename = Path(diff_dir, filename)
     the_diff = __load_diff__(diff_filename)
     with database_connection(current_app.config["SQL_URI"]) as conn:
-        __apply_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff)
+        __apply_diff__(conn, auth_token, the_diff["inbredset_id"], diff_filename, the_diff)
         return jsonify({
             "message": "Applied the diff successfully.",
             "diff_filename": diff_filename.name
         })
 
 @caseattr.route("/reject/<path:filename>", methods=["POST"])
-def reject_case_attributes_diff(filename: str) -> Response:
+@require_token
+def reject_case_attributes_diff(filename: str, auth_token=None) -> Response:
     """Reject the changes to the case attributes in the diff."""
     diff_dir = Path(current_app.config["TMPDIR"], CATTR_DIFFS_DIR)
     diff_filename = Path(diff_dir, filename)
     the_diff = __load_diff__(diff_filename)
     with database_connection(current_app.config["SQL_URI"]) as conn:
-        __reject_diff__(conn, the_diff["inbredset_id"], diff_filename, the_diff)
+        __reject_diff__(conn,
+                        auth_token,
+                        the_diff["inbredset_id"],
+                        diff_filename,
+                        the_diff)
         return jsonify({
             "message": "Rejected diff successfully",
             "diff_filename": diff_filename.name
         })
 
 @caseattr.route("/<int:inbredset_id>/diff/<int:diff_id>/view", methods=["GET"])
-def view_diff(inbredset_id: int, diff_id: int) -> Response:
+@require_token
+def view_diff(inbredset_id: int, diff_id: int, auth_token=None) -> Response:
     """View a diff."""
     with (database_connection(current_app.config["SQL_URI"]) as conn,
           conn.cursor(cursorclass=DictCursor) as cursor):
-        required_access(inbredset_id, ("system:inbredset:view-case-attribute",))
+        required_access(
+            auth_token, inbredset_id, ("system:inbredset:view-case-attribute",))
         cursor.execute(
             "SELECT * FROM caseattributes_audit WHERE id=%s",
             (diff_id,))
diff --git a/gn3/db/constants.py b/gn3/db/constants.py
new file mode 100644
index 0000000..45e3bfc
--- /dev/null
+++ b/gn3/db/constants.py
@@ -0,0 +1,152 @@
+"""
+This module contains some constants used in other modules.
+"""
+PREFIXES = {
+    "dcat": "http://www.w3.org/ns/dcat#",
+    "dct": "http://purl.org/dc/terms/",
+    "ex": "http://example.org/stuff/1.0/",
+    "fabio": "http://purl.org/spar/fabio/",
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
+    "genotype": "http://genenetwork.org/genotype/",
+    "gn": "http://genenetwork.org/id/",
+    "gnc": "http://genenetwork.org/category/",
+    "gnt": "http://genenetwork.org/term/",
+    "owl": "http://www.w3.org/2002/07/owl#",
+    "phenotype": "http://genenetwork.org/phenotype/",
+    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+    "publication": "http://genenetwork.org/publication/",
+    "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "skos": "http://www.w3.org/2004/02/skos/core#",
+    "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
+    "up": "http://purl.uniprot.org/core/",
+    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+}
+
+RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
+                          for key, value in PREFIXES.items()])
+
+BASE_CONTEXT = {
+    "data": "@graph",
+    "type": "@type",
+    "gn": "http://genenetwork.org/id/",
+    "gnc": "http://genenetwork.org/category/",
+    "gnt": "http://genenetwork.org/term/",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+}
+
+DATASET_CONTEXT = {
+    "accessRights": "dct:accessRights",
+    "accessionId": "dct:identifier",
+    "acknowledgement": "gnt:hasAcknowledgement",
+    "altLabel": "skos:altLabel",
+    "caseInfo": "gnt:hasCaseInfo",
+    "classifiedUnder": "xkos:classifiedUnder",
+    "contributors": "dct:creator",
+    "contactPoint": "dcat:contactPoint",
+    "created": "dct:created",
+    "dcat": "http://www.w3.org/ns/dcat#",
+    "dct": "http://purl.org/dc/terms/",
+    "description": "dct:description",
+    "ex": "http://example.org/stuff/1.0/",
+    "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
+    "experimentType": "gnt:hasExperimentType",
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "geoSeriesId": "gnt:hasGeoSeriesId",
+    "gnt": "http://genenetwork.org/term/",
+    "inbredSet": "gnt:belongsToGroup",
+    "label": "rdfs:label",
+    "normalization": "gnt:usesNormalization",
+    "platformInfo": "gnt:hasPlatformInfo",
+    "notes": "gnt:hasNotes",
+    "organization": "foaf:Organization",
+    "prefLabel": "skos:prefLabel",
+    "citation": "dct:isReferencedBy",
+    "GoTree": "gnt:hasGOTreeValue",
+    "platform": "gnt:usesPlatform",
+    "processingInfo": "gnt:hasDataProcessingInfo",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "skos": "http://www.w3.org/2004/02/skos/core#",
+    "specifics": "gnt:hasContentInfo",
+    "title": "dct:title",
+    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+    "tissueInfo": "gnt:hasTissueInfo",
+    "tissue": "gnt:hasTissue",
+    "contactWebUrl": "foaf:homepage",
+    "contactName": "foaf:name",
+}
+
+SEARCH_CONTEXT = {
+    "pages": "ex:pages",
+    "hits": "ex:hits",
+    "result": "ex:result",
+    "results": "ex:items",
+    "resultItem": "ex:resultType",
+    "currentPage": "ex:currentPage",
+}
+
+DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | {
+    "classifiedUnder": "xkos:classifiedUnder",
+    "created": "dct:created",
+    "dct": "http://purl.org/dc/terms/",
+    "ex": "http://example.org/stuff/1.0/",
+    "inbredSet": "ex:belongsToInbredSet",
+    "title": "dct:title",
+    "name": "rdfs:label",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "type": "@type",
+    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+}
+
+PUBLICATION_CONTEXT = {
+    "dct": "http://purl.org/dc/terms/",
+    "fabio": "http://purl.org/spar/fabio/",
+    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "title": "dct:title",
+    "journal": "fabio:Journal",
+    "volume": "prism:volume",
+    "page": "fabio:page",
+    "creator": "dct:creator",
+    "abstract": "dct:abstract",
+    "year": {
+        "@id": "fabio:hasPublicationYear",
+        "@type": "xsd:gYear",
+    },
+    "month": {
+        "@id": "prism:publicationDate",
+        "@type": "xsd:gMonth"
+    },
+}
+
+PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
+    "skos": "http://www.w3.org/2004/02/skos/core#",
+    "dcat": "http://www.w3.org/ns/dcat#",
+    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+    "traitName": "skos:altLabel",
+    "trait": "rdfs:label",
+    "altName": "rdfs:altLabel",
+    "description": "dct:description",
+    "abbreviation": "gnt:abbreviation",
+    "labCode": "gnt:labCode",
+    "submitter": "gnt:submitter",
+    "dataset": "dcat:Distribution",
+    "contributor": "dct:contributor",
+    "mean": "gnt:mean",
+    "locus": "gnt:locus",
+    "lodScore": "gnt:lodScore",
+    "references": "dct:isReferencedBy",
+    "additive": "gnt:additive",
+    "sequence": "gnt:sequence",
+    "prefLabel": "skos:prefLabel",
+    "identifier": "dct:identifier",
+    "chromosome": "gnt:chr",
+    "mb": "gnt:mb",
+    "peakLocation": "gnt:locus",
+    "species": "gnt:belongsToSpecies",
+    "group": "gnt:belongsToGroup",
+}
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index eb4014a..5a95683 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -4,39 +4,12 @@ This module is a collection of functions that handle SPARQL queries.
 
 """
 import json
-
+from string import Template
 from SPARQLWrapper import SPARQLWrapper
 from pyld import jsonld  # type: ignore
-
-
-PREFIXES = {
-    "dcat": "http://www.w3.org/ns/dcat#",
-    "dct": "http://purl.org/dc/terms/",
-    "ex": "http://example.org/stuff/1.0/",
-    "fabio": "http://purl.org/spar/fabio/",
-    "foaf": "http://xmlns.com/foaf/0.1/",
-    "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
-    "genotype": "http://genenetwork.org/genotype/",
-    "gn": "http://genenetwork.org/id/",
-    "gnc": "http://genenetwork.org/category/",
-    "gnt": "http://genenetwork.org/term/",
-    "owl": "http://www.w3.org/2002/07/owl#",
-    "phenotype": "http://genenetwork.org/phenotype/",
-    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
-    "publication": "http://genenetwork.org/publication/",
-    "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
-    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
-    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    "skos": "http://www.w3.org/2004/02/skos/core#",
-    "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
-    "up": "http://purl.uniprot.org/core/",
-    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
-    "xsd": "http://www.w3.org/2001/XMLSchema#",
-}
-
-
-RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
-                          for key, value in PREFIXES.items()])
+from gn3.db.constants import (
+    RDF_PREFIXES, BASE_CONTEXT
+)
 
 
 def sparql_construct_query(query: str, endpoint: str) -> dict:
@@ -51,22 +24,101 @@ def sparql_construct_query(query: str, endpoint: str) -> dict:
 def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
     """Frame and then compact the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.compact(jsonld.frame(results, context), context)
 
 
 def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
     """Compact the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.compact(results, context)
 
 
 def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
     """Frame the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.frame(results, context)
+
+
+def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
+    """Fetch all the Wiki entries using the symbol"""
+    # This query uses a sub-query to fetch the latest comment by the
+    # version id.
+    query = Template("""
+$prefix
+
+CONSTRUCT {
+    ?uid rdfs:label ?symbolName;
+         gnt:reason ?reason ;
+         gnt:species ?species ;
+         dct:references ?pmid ;
+         foaf:homepage ?weburl ;
+         rdfs:comment ?comment ;
+         foaf:mbox ?email ;
+         gnt:initial ?usercode ;
+         gnt:belongsToCategory ?category ;
+         gnt:hasVersion ?versionId ;
+         dct:created ?created ;
+         dct:identifier ?identifier .
+} WHERE {
+    ?symbolId rdfs:label ?symbolName .
+    ?uid rdfs:comment ?comment ;
+         gnt:symbol ?symbolId ;
+         rdf:type gnc:GNWikiEntry ;
+         dct:created ?createTime .
+    FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+    {
+        SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+            ?symbolId rdfs:label ?symbolName .
+            ?uid dct:identifier ?id_ ;
+                 dct:hasVersion ?vers ;
+                 dct:identifier ?id_ ;
+                 gnt:symbol ?symbolId .
+            FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+        }
+    }
+    ?uid dct:hasVersion ?max ;
+         dct:identifier ?id_ .
+    OPTIONAL { ?uid gnt:reason ?reason } .
+    OPTIONAL {
+        ?uid gnt:belongsToSpecies ?speciesId .
+        ?speciesId gnt:shortName ?species .
+    } .
+    OPTIONAL { ?uid dct:references ?pubmedId . } .
+    OPTIONAL { ?uid foaf:homepage ?weburl . } .
+    OPTIONAL { ?uid gnt:initial ?usercode . } .
+    OPTIONAL { ?uid gnt:mbox ?email . } .
+    OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
+    BIND (str(?version) AS ?versionId) .
+    BIND (str(?id_) AS ?identifier) .
+    BIND (str(?pubmedId) AS ?pmid) .
+    BIND (str(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
+    context = BASE_CONTEXT | {
+        "foaf": "http://xmlns.com/foaf/0.1/",
+        "dct": "http://purl.org/dc/terms/",
+        "categories": "gnt:belongsToCategory",
+        "web_url": "foaf:homepage",
+        "version": "gnt:hasVersion",
+        "symbol": "rdfs:label",
+        "reason": "gnt:reason",
+        "species": "gnt:species",
+        "pubmed_id": "dct:references",
+        "email": "foaf:mbox",
+        "initial": "gnt:initial",
+        "comment": "rdfs:comment",
+        "created": "dct:created",
+        "id": "dct:identifier",
+        # This points to the RDF Node which is the unique identifier
+        # for this triplet.  It's constructed using the comment-id and
+        # the comment-versionId
+        "wiki_identifier": "@id",
+    }
+    results = query_frame_and_compact(
+        query, context,
+        sparql_uri
+    )
+    data = results.get("data")
+    if not data:
+        return results
+    return results
diff --git a/gn3/db/wiki.py b/gn3/db/wiki.py
new file mode 100644
index 0000000..abb1644
--- /dev/null
+++ b/gn3/db/wiki.py
@@ -0,0 +1,80 @@
+"""Helper functions to access wiki entries"""
+
+from typing import Dict, List
+
+from MySQLdb.cursors import DictCursor
+
+
+class MissingDBDataException(Exception):
+    """Error due to DB missing some data"""
+
+
+def get_latest_comment(connection, comment_id: str) -> int:
+    """ Latest comment is one with the highest versionId """
+    cursor = connection.cursor(DictCursor)
+    query = """ SELECT versionId AS version, symbol, PubMed_ID AS pubmed_ids, sp.Name AS species,
+        comment, email, weburl, initial, reason
+        FROM `GeneRIF` gr
+		INNER JOIN Species sp USING(SpeciesId)
+		WHERE gr.Id = %s
+		ORDER BY versionId DESC LIMIT 1;
+    """
+    cursor.execute(query, (comment_id,))
+    result = cursor.fetchone()
+    result["pubmed_ids"] = [x.strip() for x in result["pubmed_ids"].split()]
+    categories_query = """
+        SELECT grx.GeneRIFId, grx.versionId, gc.Name FROM GeneRIFXRef grx
+                INNER JOIN GeneCategory gc ON grx.GeneCategoryId=gc.Id
+                WHERE GeneRIFId = %s AND versionId=%s;
+    """
+
+    cursor.execute(categories_query, (comment_id, result["version"]))
+    categories = cursor.fetchall()
+    result["categories"] = [x["Name"] for x in categories]
+    return result
+
+
+def get_species_id(cursor, species_name: str) -> int:
+    """Find species id given species `Name`"""
+    cursor.execute("SELECT SpeciesID from Species  WHERE Name = %s", (species_name,))
+    species_ids = cursor.fetchall()
+    if len(species_ids) != 1:
+        raise MissingDBDataException(
+            f"expected 1 species with Name={species_name} but found {len(species_ids)}!"
+        )
+    return species_ids[0][0]
+
+
+def get_next_comment_version(cursor, comment_id: int) -> int:
+    """Find the version to add, usually latest_version + 1"""
+    cursor.execute(
+        "SELECT MAX(versionId) as version_id from GeneRIF WHERE Id = %s", (comment_id,)
+    )
+    latest_version = cursor.fetchone()[0]
+    if latest_version is None:
+        raise MissingDBDataException(f"No comment found with comment_id={comment_id}")
+    return latest_version + 1
+
+
+def get_categories_ids(cursor, categories: List[str]) -> List[int]:
+    """Get the categories_ids from a list of category strings"""
+    dict_cats = get_categories(cursor)
+    category_ids = []
+    for category in set(categories):
+        cat_id = dict_cats.get(category.strip())
+        if cat_id is None:
+            raise MissingDBDataException(f"Category with Name={category} not found")
+        category_ids.append(cat_id)
+    return category_ids
+
+def get_categories(cursor) -> Dict[str, int]:
+    cursor.execute("SELECT Name, Id from GeneCategory")
+    raw_categories = cursor.fetchall()
+    dict_cats = dict(raw_categories)
+    return dict_cats
+
+def get_species(cursor) -> Dict[str, str]:
+    cursor.execute("SELECT Name, SpeciesName from Species")
+    raw_species = cursor.fetchall()
+    dict_cats = dict(raw_species)
+    return dict_cats
diff --git a/gn3/db_utils.py b/gn3/db_utils.py
index e4dc81f..0d9bd0a 100644
--- a/gn3/db_utils.py
+++ b/gn3/db_utils.py
@@ -1,11 +1,15 @@
 """module contains all db related stuff"""
 import contextlib
+import logging
 from typing import Any, Iterator, Protocol, Tuple
 from urllib.parse import urlparse
 import MySQLdb as mdb
 import xapian
 
 
+LOGGER = logging.getLogger(__file__)
+
+
 def parse_db_url(sql_uri: str) -> Tuple:
     """function to parse SQL_URI env variable note:there\
     is a default value for SQL_URI so a tuple result is\
@@ -24,7 +28,7 @@ class Connection(Protocol):
 
 
 @contextlib.contextmanager
-def database_connection(sql_uri) -> Iterator[Connection]:
+def database_connection(sql_uri: str, logger: logging.Logger = LOGGER) -> Iterator[Connection]:
     """Connect to MySQL database."""
     host, user, passwd, db_name, port = parse_db_url(sql_uri)
     connection = mdb.connect(db=db_name,
@@ -35,6 +39,7 @@ def database_connection(sql_uri) -> Iterator[Connection]:
     try:
         yield connection
     except mdb.Error as _mbde:
+        logger.error("DB error encountered", exc_info=True)
         connection.rollback()
     finally:
         connection.commit()
diff --git a/gn3/errors.py b/gn3/errors.py
index c53604f..ec7a554 100644
--- a/gn3/errors.py
+++ b/gn3/errors.py
@@ -15,6 +15,7 @@ from werkzeug.exceptions import NotFound
 from authlib.oauth2.rfc6749.errors import OAuth2Error
 from flask import Flask, jsonify, Response, current_app
 
+from gn3.oauth2 import errors as oautherrors
 from gn3.auth.authorisation.errors import AuthorisationError
 from  gn3.llms.errors import LLMError
 
@@ -28,6 +29,7 @@ def add_trace(exc: Exception, jsonmsg: dict) -> dict:
 
 def page_not_found(pnf):
     """Generic 404 handler."""
+    current_app.logger.error("Handling 404 errors", exc_info=True)
     return jsonify(add_trace(pnf, {
         "error": pnf.name,
         "error_description": pnf.description
@@ -36,6 +38,7 @@ def page_not_found(pnf):
 
 def internal_server_error(pnf):
     """Generic 404 handler."""
+    current_app.logger.error("Handling internal server errors", exc_info=True)
     return jsonify(add_trace(pnf, {
         "error": pnf.name,
         "error_description": pnf.description
@@ -44,15 +47,16 @@ def internal_server_error(pnf):
 
 def url_server_error(pnf):
     """Handler for an exception with a url connection."""
+    current_app.logger.error("Handling url server errors", exc_info=True)
     return jsonify(add_trace(pnf, {
         "error": f"URLLib Error no: {pnf.reason.errno}",
         "error_description": pnf.reason.strerror,
-    }))
+    })), 500
 
 
 def handle_authorisation_error(exc: AuthorisationError):
     """Handle AuthorisationError if not handled anywhere else."""
-    current_app.logger.error(exc)
+    current_app.logger.error("Handling external auth errors", exc_info=True)
     return jsonify(add_trace(exc, {
         "error": type(exc).__name__,
         "error_description": " :: ".join(exc.args)
@@ -61,7 +65,7 @@ def handle_authorisation_error(exc: AuthorisationError):
 
 def handle_oauth2_errors(exc: OAuth2Error):
     """Handle OAuth2Error if not handled anywhere else."""
-    current_app.logger.error(exc)
+    current_app.logger.error("Handling external oauth2 errors", exc_info=True)
     return jsonify(add_trace(exc, {
         "error": exc.error,
         "error_description": exc.description,
@@ -70,7 +74,7 @@ def handle_oauth2_errors(exc: OAuth2Error):
 
 def handle_sqlite3_errors(exc: OperationalError):
     """Handle sqlite3 errors if not handled anywhere else."""
-    current_app.logger.error(exc)
+    current_app.logger.error("Handling sqlite3 errors", exc_info=True)
     return jsonify({
         "error": "DatabaseError",
         "error_description": exc.args[0],
@@ -78,24 +82,23 @@ def handle_sqlite3_errors(exc: OperationalError):
 
 
 def handle_sparql_errors(exc):
-    """Handle sqlite3 errors if not handled anywhere else."""
-    current_app.logger.error(exc)
-    __code = {
-        EndPointInternalError: 500,
-        EndPointNotFound: 400,
-        QueryBadFormed: 400,
-        Unauthorized: 401,
-        URITooLong: 414,
+    """Handle sparql/virtuoso errors if not handled anywhere else."""
+    current_app.logger.error("Handling sparql errors", exc_info=True)
+    code = {
+        "EndPointInternalError": 500,
+        "EndPointNotFound": 404,
+        "QueryBadFormed": 400,
+        "Unauthorized": 401,
+        "URITooLong": 414,
     }
     return jsonify({
         "error": exc.msg,
-        "error_description": str(exc),
-    }), __code.get(exc)
+    }), code.get(exc.__class__.__name__)
 
 
 def handle_generic(exc: Exception) -> Response:
     """Handle generic exception."""
-    current_app.logger.error(exc)
+    current_app.logger.error("Handling generic errors", exc_info=True)
     resp = jsonify({
         "error": type(exc).__name__,
         "error_description": (
@@ -106,6 +109,15 @@ def handle_generic(exc: Exception) -> Response:
     return resp
 
 
+def handle_local_authorisation_errors(exc: oautherrors.AuthorisationError):
+    """Handle errors relating to authorisation that are raised locally."""
+    current_app.logger.error("Handling local auth errors", exc_info=True)
+    return jsonify(add_trace(exc, {
+        "error": type(exc).__name__,
+        "error_description": " ".join(exc.args)
+    })), 400
+
+
 def handle_llm_error(exc: Exception) -> Response:
     """ Handle llm erros if not handled  anywhere else. """
     current_app.logger.error(exc)
diff --git a/gn3/oauth2/__init__.py b/gn3/oauth2/__init__.py
new file mode 100644
index 0000000..8001d34
--- /dev/null
+++ b/gn3/oauth2/__init__.py
@@ -0,0 +1 @@
+"""Package to handle OAuth2 authorisation and other issues."""
diff --git a/gn3/oauth2/authorisation.py b/gn3/oauth2/authorisation.py
new file mode 100644
index 0000000..b2dd1ae
--- /dev/null
+++ b/gn3/oauth2/authorisation.py
@@ -0,0 +1,34 @@
+"""Handle authorisation with auth server."""
+from functools import wraps
+
+from flask import request, jsonify, current_app as app
+
+from gn3.oauth2 import jwks
+from gn3.oauth2.errors import TokenValidationError
+
+
+def require_token(func):
+    """Check for and verify bearer token."""
+    @wraps(func)
+    def __auth__(*args, **kwargs):
+        try:
+            bearer = request.headers.get("Authorization", "")
+            if bearer.startswith("Bearer"):
+                # validate token and return it
+                _extra, token = [item.strip() for item in bearer.split(" ")]
+                _jwt = jwks.validate_token(
+                    token,
+                    jwks.fetch_jwks(app.config["AUTH_SERVER_URL"],
+                                    "auth/public-jwks"))
+                return func(*args, **{**kwargs, "auth_token": {"access_token": token, "jwt": _jwt}})
+            error_message = "We expected a bearer token but did not get one."
+        except TokenValidationError as _tve:
+            app.logger.debug("Token validation failed.", exc_info=True)
+            error_message = "The token was found to be invalid."
+
+        return jsonify({
+            "error": "TokenValidationError",
+            "description": error_message
+        }), 400
+
+    return __auth__
diff --git a/gn3/oauth2/errors.py b/gn3/oauth2/errors.py
new file mode 100644
index 0000000..f8cfd2c
--- /dev/null
+++ b/gn3/oauth2/errors.py
@@ -0,0 +1,8 @@
+"""List of possible errors."""
+
+class AuthorisationError(Exception):
+    """Top-level error class dealing with generic authorisation errors."""
+
+
+class TokenValidationError(AuthorisationError):
+    """Class to indicate that token validation failed."""
diff --git a/gn3/oauth2/jwks.py b/gn3/oauth2/jwks.py
new file mode 100644
index 0000000..8798a3f
--- /dev/null
+++ b/gn3/oauth2/jwks.py
@@ -0,0 +1,36 @@
+"""Utilities dealing with JSON Web Keys (JWK)"""
+from urllib.parse import urljoin
+
+import requests
+from flask import current_app as app
+from authlib.jose.errors import BadSignatureError
+from authlib.jose import KeySet, JsonWebKey, JsonWebToken
+
+from gn3.oauth2.errors import TokenValidationError
+
+
+def fetch_jwks(authserveruri: str, path: str = "auth/public-jwks") -> KeySet:
+    """Fetch the JWKs from a particular URI"""
+    try:
+        response = requests.get(urljoin(authserveruri, path))
+        if response.status_code == 200:
+            return KeySet([
+                JsonWebKey.import_key(key) for key in response.json()["jwks"]])
+    # XXXX: TODO: Catch specific exception we need.
+    # pylint: disable=W0703
+    except Exception as _exc:
+        app.logger.debug("There was an error fetching the JSON Web Keys.",
+                         exc_info=True)
+
+    return KeySet([])
+
+
+def validate_token(token: str, keys: KeySet) -> dict:
+    """Validate the token against the given keys."""
+    for key in keys.keys:
+        try:
+            return JsonWebToken(["RS256"]).decode(token, key=key)
+        except BadSignatureError as _bse:
+            pass
+
+    raise TokenValidationError("No key was found for validation.")
diff --git a/gn3/settings.py b/gn3/settings.py
index acf3619..1e794ff 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -85,7 +85,7 @@ ROUND_TO = 10
 
 MULTIPROCESSOR_PROCS = 6  # Number of processes to spawn
 
-AUTH_SERVER_URL = ""
+AUTH_SERVER_URL = "https://auth.genenetwork.org"
 AUTH_MIGRATIONS = "migrations/auth"
 AUTH_DB = os.environ.get(
     "AUTH_DB", f"{os.environ.get('HOME')}/genenetwork/gn3_files/db/auth.db")
@@ -93,8 +93,6 @@ OAUTH2_SCOPE = (
     "profile", "group", "role", "resource", "user", "masquerade",
     "introspect")
 
-GNQA_DB = os.environ.get(
-    "GNQA_DB", f"{os.environ.get('HOME')}/tmp/gnqa.db")
 
 try:
     # *** SECURITY CONCERN ***
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 1f649cf..2779abc 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -8,21 +8,26 @@ xapian index. This xapian index is later used in providing search
 through the web interface.
 
 """
-
-from collections import deque, namedtuple
+from dataclasses import dataclass
+from collections import deque, namedtuple, Counter
 import contextlib
+import time
+import datetime
 from functools import partial
 import itertools
 import json
 import logging
-from multiprocessing import Lock, Process
+from multiprocessing import Lock, Manager, Process, managers
 import os
 import pathlib
 import resource
+import re
 import shutil
 import sys
+import hashlib
 import tempfile
-from typing import Callable, Generator, Iterable, List
+from typing import Callable, Dict, Generator, Hashable, Iterable, List
+from SPARQLWrapper import SPARQLWrapper, JSON
 
 import MySQLdb
 import click
@@ -33,7 +38,10 @@ import xapian
 from gn3.db_utils import database_connection
 from gn3.monads import query_sql
 
-DOCUMENTS_PER_CHUNK = 100000
+DOCUMENTS_PER_CHUNK = 100_000
+# Running the script in prod consumers ~1GB per process when handling 100_000 Documents per chunk.
+# To prevent running out of RAM, we set this as the upper bound for total concurrent processes
+PROCESS_COUNT_LIMIT = 67
 
 SQLQuery = namedtuple("SQLQuery",
                       ["fields", "tables", "where", "offset", "limit"],
@@ -122,6 +130,38 @@ phenotypes_query = SQLQuery(
      SQLTableClause("LEFT JOIN", "Geno",
                     "PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id")])
 
+WIKI_CACHE_QUERY = """
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX gnt: <http://genenetwork.org/term/>
+PREFIX gnc: <http://genenetwork.org/category/>
+
+SELECT ?symbolName ?speciesName GROUP_CONCAT(DISTINCT ?comment ; separator=\"\\n\") AS ?comment WHERE {
+    ?symbol rdfs:comment _:node ;
+            rdfs:label ?symbolName .
+_:node rdf:type gnc:GNWikiEntry ;
+       gnt:belongsToSpecies ?species ;
+       rdfs:comment ?comment .
+?species gnt:shortName ?speciesName .
+} GROUP BY ?speciesName ?symbolName
+"""
+
+RIF_CACHE_QUERY = """
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+PREFIX gnt: <http://genenetwork.org/term/>
+PREFIX gnc: <http://genenetwork.org/category/>
+
+SELECT ?symbolName ?speciesName GROUP_CONCAT(DISTINCT ?comment ; separator=\"\\n\") AS ?comment WHERE {
+    ?symbol rdfs:comment _:node ;
+            rdfs:label ?symbolName .
+_:node rdf:type gnc:NCBIWikiEntry ;
+       gnt:belongsToSpecies ?species ;
+       rdfs:comment ?comment .
+?species gnt:shortName ?speciesName .
+} GROUP BY ?speciesName ?symbolName
+"""
+
 
 def serialize_sql(query: SQLQuery) -> str:
     """Serialize SQLQuery object to a string."""
@@ -168,6 +208,48 @@ def locked_xapian_writable_database(path: pathlib.Path) -> xapian.WritableDataba
         db.close()
 
 
+def build_rdf_cache(sparql_uri: str, query: str, remove_common_words: bool = False):
+    cache = {}
+    sparql = SPARQLWrapper(sparql_uri)
+    sparql.setReturnFormat(JSON)
+    sparql.setQuery(query)
+    results = sparql.queryAndConvert()
+    if not isinstance(results, dict):
+        raise TypeError(f"Expected results to be a dict but found {type(results)}")
+    bindings = results["results"]["bindings"]
+    count: Counter[str] = Counter()
+    words_regex = re.compile(r"\w+")
+    for entry in bindings :
+        x = (entry["speciesName"]["value"], entry["symbolName"]["value"],)
+        value = entry["comment"]["value"]
+        value = " ".join(words_regex.findall(value)) # remove punctuation
+        cache[x] = value
+        count.update(Counter(value.lower().strip().split()))
+
+    if not remove_common_words:
+        return cache
+
+    words_to_drop = set()
+    for word, cnt in count.most_common(1000):
+        if len(word) < 4 or cnt > 3000:
+            words_to_drop.add(word)
+    smaller_cache = {}
+    for entry, value in cache.items():
+        new_value = set(word for word in value.lower().split() if word not in words_to_drop)
+        smaller_cache[entry] = " ".join(new_value)
+    return smaller_cache
+
+
+def md5hash_ttl_dir(ttl_dir: pathlib.Path) -> str:
+    if not ttl_dir.exists():
+        return "-1"
+    ttl_hash = hashlib.new("md5")
+    for ttl_file in ttl_dir.glob("*.ttl"):
+        with open(ttl_file, encoding="utf-8") as f_:
+            ttl_hash.update(f_.read().encode())
+    return ttl_hash.hexdigest()
+
+
 # pylint: disable=invalid-name
 def write_document(db: xapian.WritableDatabase, identifier: str,
                    doctype: str, doc: xapian.Document) -> None:
@@ -181,15 +263,23 @@ def write_document(db: xapian.WritableDatabase, identifier: str,
 
 termgenerator = xapian.TermGenerator()
 termgenerator.set_stemmer(xapian.Stem("en"))
+termgenerator.set_stopper_strategy(xapian.TermGenerator.STOP_ALL)
+termgenerator.set_stopper(xapian.SimpleStopper())
 
 def index_text(text: str) -> None:
     """Index text and increase term position."""
     termgenerator.index_text(text)
     termgenerator.increase_termpos()
 
-# pylint: disable=unnecessary-lambda
-index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text)
+@curry(3)
+def index_from_dictionary(keys: Hashable, prefix: str, dictionary: dict):
+    entry = dictionary.get(keys)
+    if not entry:
+        return
+    termgenerator.index_text_without_positions(entry, 0, prefix)
+
 
+index_text_without_positions = lambda text: termgenerator.index_text_without_positions(text)
 index_authors = lambda authors: termgenerator.index_text(authors, 0, "A")
 index_species = lambda species: termgenerator.index_text_without_positions(species, 0, "XS")
 index_group = lambda group: termgenerator.index_text_without_positions(group, 0, "XG")
@@ -206,10 +296,17 @@ add_peakmb = lambda doc, peakmb: doc.add_value(3, xapian.sortable_serialise(peak
 add_additive = lambda doc, additive: doc.add_value(4, xapian.sortable_serialise(additive))
 add_year = lambda doc, year: doc.add_value(5, xapian.sortable_serialise(float(year)))
 
+
+
+
 # When a child process is forked, it inherits a copy of the memory of
 # its parent. We use this to pass data retrieved from SQL from parent
 # to child. Specifically, we use this global variable.
-data: Iterable
+# This is copy-on-write so make sure child processes don't modify this data
+mysql_data: Iterable
+rif_cache: Iterable
+wiki_cache: Iterable
+
 # We use this lock to ensure that only one process writes its Xapian
 # index to disk at a time.
 xapian_lock = Lock()
@@ -217,7 +314,7 @@ xapian_lock = Lock()
 def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int) -> None:
     """Index genes data into a Xapian index."""
     with locked_xapian_writable_database(xapian_build_directory / f"genes-{chunk_index:04d}") as db:
-        for trait in data:
+        for trait in mysql_data:
             # pylint: disable=cell-var-from-loop
             doc = xapian.Document()
             termgenerator.set_document(doc)
@@ -230,7 +327,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int) -> None:
             trait["additive"].bind(partial(add_additive, doc))
 
             # Index free text.
-            for key in ["description", "tissue", "dataset_fullname"]:
+            for key in ["description", "tissue", "dataset"]:
                 trait[key].bind(index_text)
             trait.pop("probe_target_description").bind(index_text)
             for key in ["name", "symbol", "species", "group"]:
@@ -242,11 +339,23 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int) -> None:
             trait["species"].bind(index_species)
             trait["group"].bind(index_group)
             trait["tissue"].bind(index_tissue)
-            trait["dataset_fullname"].bind(index_dataset)
+            trait["dataset"].bind(index_dataset)
             trait["symbol"].bind(index_symbol)
             trait["chr"].bind(index_chr)
             trait["geno_chr"].bind(index_peakchr)
 
+            Maybe.apply(index_from_dictionary).to_arguments(
+                    Just((trait["species"].value, trait["symbol"].value)),
+                    Just("XRF"),
+                    Just(rif_cache)
+                    )
+
+            Maybe.apply(index_from_dictionary).to_arguments(
+                    Just((trait["species"].value, trait["symbol"].value)),
+                    Just("XWK"),
+                    Just(wiki_cache)
+                    )
+
             doc.set_data(json.dumps(trait.data))
             (Maybe.apply(curry(2, lambda name, dataset: f"{name}:{dataset}"))
              .to_arguments(trait["name"], trait["dataset"])
@@ -257,7 +366,8 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int) ->
     """Index phenotypes data into a Xapian index."""
     with locked_xapian_writable_database(
             xapian_build_directory / f"phenotypes-{chunk_index:04d}") as db:
-        for trait in data:
+
+        for trait in mysql_data:
             # pylint: disable=cell-var-from-loop
             doc = xapian.Document()
             termgenerator.set_document(doc)
@@ -270,7 +380,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int) ->
             trait["year"].bind(partial(add_year, doc))
 
             # Index free text.
-            for key in ["description", "authors", "dataset_fullname"]:
+            for key in ["description", "authors", "dataset"]:
                 trait[key].bind(index_text)
             for key in ["Abstract", "Title"]:
                 trait.pop(key).bind(index_text)
@@ -284,7 +394,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int) ->
             trait["group"].bind(index_group)
             trait["authors"].bind(index_authors)
             trait["geno_chr"].bind(index_peakchr)
-            trait["dataset_fullname"].bind(index_dataset)
+            trait["dataset"].bind(index_dataset)
 
             # Convert name from integer to string.
             trait["name"] = trait["name"].map(str)
@@ -320,12 +430,16 @@ def worker_queue(number_of_workers: int = os.cpu_count() or 1) -> Generator:
         process.join()
 
 
-def index_query(index_function: Callable, query: SQLQuery,
-                xapian_build_directory: pathlib.Path, sql_uri: str, start: int = 0) -> None:
+def index_query(index_function: Callable[[pathlib.Path, int], None], query: SQLQuery,
+                xapian_build_directory: pathlib.Path, sql_uri: str,
+                sparql_uri: str, start: int = 0) -> None:
     """Run SQL query, and index its results for Xapian."""
     i = start
+    default_no_of_workers = os.cpu_count() or 1
+    no_of_workers = min(default_no_of_workers, PROCESS_COUNT_LIMIT)
+
     try:
-        with worker_queue() as spawn_worker:
+        with worker_queue(no_of_workers) as spawn_worker:
             with database_connection(sql_uri) as conn:
                 for chunk in group(query_sql(conn, serialize_sql(
                         # KLUDGE: MariaDB does not allow an offset
@@ -335,9 +449,8 @@ def index_query(index_function: Callable, query: SQLQuery,
                                        offset=start*DOCUMENTS_PER_CHUNK)),
                                                    server_side=True),
                                    DOCUMENTS_PER_CHUNK):
-                    # pylint: disable=global-statement
-                    global data
-                    data = chunk
+                    global mysql_data
+                    mysql_data = chunk
                     spawn_worker(index_function, (xapian_build_directory, i))
                     logging.debug("Spawned worker process on chunk %s", i)
                     i += 1
@@ -347,7 +460,7 @@ def index_query(index_function: Callable, query: SQLQuery,
     except MySQLdb._exceptions.OperationalError:
         logging.warning("Reopening connection to recovering from SQL operational error",
                         exc_info=True)
-        index_query(index_function, query, xapian_build_directory, sql_uri, i)
+        index_query(index_function, query, xapian_build_directory, sql_uri, sparql_uri, i)
 
 
 @contextlib.contextmanager
@@ -357,12 +470,33 @@ def temporary_directory(prefix: str, parent_directory: str) -> Generator:
         yield pathlib.Path(tmpdirname)
 
 
+def parallel_xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) -> None:
+    # We found that compacting 50 files of ~600MB has decent performance
+    no_of_workers = 20
+    file_groupings = 50
+    with temporary_directory("parallel_combine", str(combined_index)) as parallel_combine:
+        parallel_combine.mkdir(parents=True, exist_ok=True)
+        with worker_queue(no_of_workers) as spawn_worker:
+            i = 0
+            while i < len(indices):
+                end_index = (i + file_groupings)
+                files = indices[i:end_index]
+                last_item_idx = i + len(files)
+                spawn_worker(xapian_compact, (parallel_combine / f"{i}_{last_item_idx}", files))
+                logging.debug("Spawned worker to compact files from %s to %s", i, last_item_idx)
+                i = end_index
+        logging.debug("Completed parallel xapian compacts")
+        xapian_compact(combined_index, list(parallel_combine.iterdir()))
+
+
 def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) -> None:
     """Compact and combine several Xapian indices."""
     # xapian-compact opens all indices simultaneously. So, raise the limit on
     # the number of open files.
     soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
     resource.setrlimit(resource.RLIMIT_NOFILE, (max(soft, min(10*len(indices), hard)), hard))
+    combined_index.mkdir(parents=True, exist_ok=True)
+    start = time.monotonic()
     db = xapian.Database()
     try:
         for index in indices:
@@ -370,32 +504,73 @@ def xapian_compact(combined_index: pathlib.Path, indices: List[pathlib.Path]) ->
         db.compact(str(combined_index), xapian.DBCOMPACT_MULTIPASS | xapian.Compactor.FULLER)
     finally:
         db.close()
+    logging.debug("Removing databases that were compacted into %s", combined_index.name)
+    for folder in indices:
+        shutil.rmtree(folder)
+    logging.debug("Completed xapian-compact %s; handled %s files in %s minutes", combined_index.name, len(indices), (time.monotonic() - start) / 60)
+
+
+@click.command(help="Verify checksums and return True when the data has been changed.")
+@click.argument("xapian_directory")
+@click.argument("sql_uri")
+@click.argument("sparql_uri")
+def is_data_modified(xapian_directory: str,
+                     sql_uri: str,
+                     sparql_uri: str) -> None:
+    dir_ = pathlib.Path(xapian_directory)
+    with locked_xapian_writable_database(dir_) as db, database_connection(sql_uri) as conn:
+        checksums = "-1"
+        if db.get_metadata('tables'):
+            checksums = " ".join([
+                str(result["Checksum"].value)
+                for result in query_sql(
+                        conn,
+                        f"CHECKSUM TABLE {', '.join(db.get_metadata('tables').decode().split())}")
+            ])
+        # Return a zero exit status code when the data has changed;
+        # otherwise exit with a 1 exit status code.
+        generif = pathlib.Path("/var/lib/data/")
+        if (db.get_metadata("generif-checksum").decode() == md5hash_ttl_dir(generif) and
+            db.get_metadata("checksums").decode() == checksums):
+            sys.exit(1)
+        sys.exit(0)
 
 
 @click.command(help="Index GeneNetwork data and build Xapian search index in XAPIAN_DIRECTORY.")
 @click.argument("xapian_directory")
 @click.argument("sql_uri")
+@click.argument("sparql_uri")
 # pylint: disable=missing-function-docstring
-def main(xapian_directory: str, sql_uri: str) -> None:
+def create_xapian_index(xapian_directory: str, sql_uri: str,
+                        sparql_uri: str) -> None:
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG"),
-                        format='%(relativeCreated)s: %(levelname)s: %(message)s')
+                        format='%(asctime)s %(levelname)s: %(message)s',
+                        datefmt='%Y-%m-%d %H:%M:%S %Z')
+    if not pathlib.Path(xapian_directory).exists():
+        pathlib.Path(xapian_directory).mkdir()
 
     # Ensure no other build process is running.
-    if pathlib.Path(xapian_directory).exists():
-        logging.error("Build directory %s already exists; "
+    if any(pathlib.Path(xapian_directory).iterdir()):
+        logging.error("Build directory %s has build files; "
                       "perhaps another build process is running.",
                       xapian_directory)
         sys.exit(1)
 
-    pathlib.Path(xapian_directory).mkdir()
+    start_time = time.perf_counter()
     with temporary_directory("combined", xapian_directory) as combined_index:
         with temporary_directory("build", xapian_directory) as xapian_build_directory:
+            global rif_cache
+            global wiki_cache
+            logging.info("Building wiki cache")
+            wiki_cache = build_rdf_cache(sparql_uri, WIKI_CACHE_QUERY, remove_common_words=True)
+            logging.info("Building rif cache")
+            rif_cache = build_rdf_cache(sparql_uri, RIF_CACHE_QUERY, remove_common_words=True)
             logging.info("Indexing genes")
-            index_query(index_genes, genes_query, xapian_build_directory, sql_uri)
+            index_query(index_genes, genes_query, xapian_build_directory, sql_uri, sparql_uri)
             logging.info("Indexing phenotypes")
-            index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri)
+            index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri, sparql_uri)
             logging.info("Combining and compacting indices")
-            xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
+            parallel_xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
             logging.info("Writing table checksums into index")
             with locked_xapian_writable_database(combined_index) as db:
                 # Build a (deduplicated) set of all tables referenced in
@@ -409,11 +584,27 @@ def main(xapian_directory: str, sql_uri: str) -> None:
                     ]
                 db.set_metadata("tables", " ".join(tables))
                 db.set_metadata("checksums", " ".join(checksums))
+                logging.info("Writing generif checksums into index")
+                db.set_metadata(
+                    "generif-checksum",
+                    md5hash_ttl_dir(pathlib.Path("/var/lib/data/")).encode())
         for child in combined_index.iterdir():
             shutil.move(child, xapian_directory)
     logging.info("Index built")
+    end_time = time.perf_counter()
+    index_time = datetime.timedelta(seconds=end_time - start_time)
+    logging.info(f"Time to Index: {index_time}")
+
+
+@click.group()
+def cli():
+    pass
+
+
+cli.add_command(is_data_modified)
+cli.add_command(create_xapian_index)
 
 
 if __name__ == "__main__":
     # pylint: disable=no-value-for-parameter
-    main()
+    cli()
diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index ea2c345..2ac8faa 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -3,8 +3,6 @@ library(qtl)
 library(stringi)
 library(stringr)
 
-tmp_dir = Sys.getenv("TMPDIR")
-
 option_list = list(
   make_option(c("-g", "--geno"), type="character", help=".geno file containing a dataset's genotypes"),
   make_option(c("-p", "--pheno"), type="character", help="File containing two columns - sample names and values"),
@@ -18,7 +16,7 @@ option_list = list(
   make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"),
   make_option(c("-s", "--scale"), type="character", default="mb", help="Mapping scale - Megabases (Mb) or Centimorgans (cM)"),
   make_option(c("--control"), type="character", default=NULL, help="Name of marker (contained in genotype file) to be used as a control"),
-  make_option(c("-o", "--outdir"), type="character", default=file.path(tmp_dir, "output"), help="Directory in which to write result file"),
+  make_option(c("-o", "--outdir"), type="character", default=NULL, help="Directory in which to write result file"),
   make_option(c("-f", "--filename"), type="character", default=NULL, help="Name to use for result file"),
   make_option(c("-v", "--verbose"), action="store_true", default=NULL, help="Show extra information")
 );
@@ -58,7 +56,7 @@ geno_file = opt$geno
 pheno_file = opt$pheno
 
 # Generate randomized filename for cross object
-cross_file = file.path(tmp_dir, "cross", paste(stri_rand_strings(1, 8), ".cross", sep = ""))
+cross_file = file.path(opt$outdir, "cross", paste(stri_rand_strings(1, 8), ".cross", sep = ""))
 
 trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }
 
@@ -258,9 +256,9 @@ if (!is.null(opt$pairscan)) {
 # Calculate permutations
 if (opt$nperm > 0) {
   if (!is.null(opt$filename)){
-    perm_out_file = file.path(opt$outdir, paste("PERM_", opt$filename, sep = "" ))
+    perm_out_file = file.path(opt$outdir, "output", paste("PERM_", opt$filename, sep = "" ))
   } else {
-    perm_out_file = file.path(opt$outdir, paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), sep = ""))
+    perm_out_file = file.path(opt$outdir, "output", paste(pheno_name, "_PERM_", stri_rand_strings(1, 8), sep = ""))
   }
 
   if (!is.null(opt$addcovar) || !is.null(opt$control)){
@@ -284,9 +282,9 @@ if (opt$nperm > 0) {
 }
 
 if (!is.null(opt$filename)){
-  out_file = file.path(opt$outdir, opt$filename)
+  out_file = file.path(opt$outdir, "output", opt$filename)
 } else {
-  out_file = file.path(opt$outdir, paste(pheno_name, "_", stri_rand_strings(1, 8), sep = ""))
+  out_file = file.path(opt$outdir, "output", paste(pheno_name, "_", stri_rand_strings(1, 8), sep = ""))
 }
 
 if (!is.null(opt$addcovar) || !is.null(opt$control)){
@@ -299,7 +297,7 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){
 
 verbose_print('Writing results to CSV file\n')
 if (!is.null(opt$pairscan)) {
-  map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" ))
+  map_out_file = file.path(opt$outdir, "output", paste("MAP_", opt$filename, sep = "" ))
   write.csv(qtl_results[1], out_file)
   write.csv(qtl_results[2], map_out_file)
 } else {
diff --git a/scripts/update_rif_table.py b/scripts/update_rif_table.py
new file mode 100755
index 0000000..24edf3d
--- /dev/null
+++ b/scripts/update_rif_table.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+
+"""
+Script responsible for updating the GeneRIF_BASIC table
+"""
+
+import argparse
+import csv
+import datetime
+import gzip
+import logging
+import pathlib
+import os
+from tempfile import TemporaryDirectory
+from typing import Dict, Generator
+
+import requests
+from MySQLdb.cursors import DictCursor
+
+from gn3.db_utils import database_connection
+
+TAX_IDS = {"10090": 1, "9606": 4, "10116": 2, "3702": 3}
+
+GENE_INFO_URL = "https://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz"
+GENERIFS_BASIC_URL = "https://ftp.ncbi.nih.gov/gene/GeneRIF/generifs_basic.gz"
+
+VERSION_ID = 5
+
+
+INSERT_QUERY = """ INSERT INTO GeneRIF_BASIC
+(SpeciesId, GeneId, symbol, PubMed_Id, createtime, comment, TaxID, VersionId)
+VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
+"""
+
+
+def download_file(url: str, dest: pathlib.Path):
+    """Saves the contents of url in dest"""
+    with requests.get(url, stream=True) as resp:
+        resp.raise_for_status()
+        with open(dest, "wb") as downloaded_file:
+            for chunk in resp.iter_content(chunk_size=8192):
+                downloaded_file.write(chunk)
+
+
+def read_tsv_file(fname: pathlib.Path) -> Generator:
+    """Load tsv file from NCBI"""
+    with gzip.open(fname, mode="rt") as gz_file:
+        reader = csv.DictReader(gz_file, delimiter="\t", quoting=csv.QUOTE_NONE)
+        yield from reader
+
+
+def parse_gene_info_from_ncbi(fname: pathlib.Path) -> Dict[str, str]:
+    """Parse gene_info into geneid: symbol pairs"""
+    genedict: Dict[str, str] = {}
+    for row in read_tsv_file(fname):
+        if row["#tax_id"] not in TAX_IDS:
+            continue
+        gene_id, symbol = row["GeneID"], row["Symbol"]
+        genedict[gene_id] = symbol
+    return genedict
+
+
+def build_already_exists_cache(conn) -> dict:
+    """
+    Build cache for all GeneId, SpeciesID, createtime, PubMed_ID combinations.
+    Helps prevent duplicate inserts.
+    """
+    cache = {}
+    query = """SELECT
+        COUNT(*) as cnt, GeneId, SpeciesId, createtime, PubMed_ID
+        from GeneRIF_BASIC
+        GROUP BY GeneId, SpeciesId, createtime, PubMed_Id """
+
+    with conn.cursor(DictCursor) as cursor:
+        cursor.execute(query)
+        while row := cursor.fetchone():
+            key = (
+                str(row["GeneId"]),
+                str(row["SpeciesId"]),
+                row["createtime"],
+                str(row["PubMed_ID"]),
+            )
+            cache[key] = row["cnt"]
+    return cache
+
+
+def should_add_rif_row(row: dict, exists_cache: dict) -> bool:
+    """Checks if we can add a rif_row, prevent duplicate errors from Mysql"""
+    species_id = str(TAX_IDS[row["#Tax ID"]])
+    insert_date = datetime.datetime.fromisoformat(row["last update timestamp"])
+    search_key = (
+        row["Gene ID"],
+        species_id,
+        insert_date,
+        row["PubMed ID (PMID) list"],
+    )
+    if search_key not in exists_cache:
+        exists_cache[search_key] = 1
+        return True
+    return False
+
+
+def update_rif(sqluri: str):
+    """Update GeneRIF_BASIC table"""
+    with TemporaryDirectory() as _tmpdir:
+        tmpdir = pathlib.Path(_tmpdir)
+        gene_info_path = tmpdir / "gene_info.gz"
+        logging.debug("Fetching gene_info data from: %s", GENE_INFO_URL)
+        download_file(GENE_INFO_URL, gene_info_path)
+
+        logging.debug("Fetching gene_rif_basics data from: %s", GENERIFS_BASIC_URL)
+        generif_basics_path = tmpdir / "generif_basics.gz"
+        download_file(
+            GENERIFS_BASIC_URL,
+            generif_basics_path,
+        )
+
+        logging.debug("Parsing gene_info data")
+        genedict = parse_gene_info_from_ncbi(gene_info_path)
+        with database_connection(sql_uri=sqluri) as con:
+            exists_cache = build_already_exists_cache(con)
+            cursor = con.cursor()
+            skipped_if_exists, added = 0, 0
+            for row in read_tsv_file(generif_basics_path):
+                if row["#Tax ID"] not in TAX_IDS:
+                    continue
+                if not should_add_rif_row(row, exists_cache):
+                    skipped_if_exists += 1
+                    continue
+                species_id = TAX_IDS[row["#Tax ID"]]
+                symbol = genedict.get(row["Gene ID"], "")
+                insert_values = (
+                    species_id,  # SpeciesId
+                    row["Gene ID"],  # GeneId
+                    symbol,  # symbol
+                    row["PubMed ID (PMID) list"],  # PubMed_ID
+                    row["last update timestamp"],  # createtime
+                    row["GeneRIF text"],  # comment
+                    row["#Tax ID"],  # TaxID
+                    VERSION_ID,  # VersionId
+                )
+                cursor.execute(INSERT_QUERY, insert_values)
+                added += 1
+                if added % 40_000 == 0:
+                    logging.debug("Added 40,000 rows to database")
+        logging.info(
+            "Generif_BASIC table updated. Added %s. Skipped %s because they "
+            "already exists. In case of error, you can use VersionID=%s to find "
+            "rows inserted with this script", added, skipped_if_exists,
+            VERSION_ID
+        )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(
+        level=os.environ.get("LOGLEVEL", "DEBUG"),
+        format="%(asctime)s %(levelname)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S %Z",
+    )
+    parser = argparse.ArgumentParser("Update Generif_BASIC table")
+    parser.add_argument(
+        "--sql-uri",
+        required=True,
+        help="MYSQL uri path in the form mysql://user:password@localhost/gn2",
+    )
+    args = parser.parse_args()
+    update_rif(args.sql_uri)