From 60e2ca08beb9a9d5795f0a44df096fc6c0bc2583 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Wed, 26 Oct 2022 14:23:34 +0530 Subject: Add search. * gn3/api/search.py: New file. * gn3/app.py: Register the search blueprint. --- gn3/api/search.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 gn3/api/search.py (limited to 'gn3/api/search.py') diff --git a/gn3/api/search.py b/gn3/api/search.py new file mode 100644 index 0000000..78a3245 --- /dev/null +++ b/gn3/api/search.py @@ -0,0 +1,62 @@ +"""Search using Xapian index.""" + +import json +import urllib.parse + +from flask import abort, Blueprint, jsonify, request +import xapian + +from gn3.monads import MonadicDict +from gn3.db_utils import xapian_database + +search = Blueprint("search", __name__) + +@search.route("/") +def search_results(): + """Search Xapian index and return a list of results.""" + args = request.args + search_type = args.get("type", default="gene") + querystring = args.get("query", default="") + page = args.get("page", default=1, type=int) + if page < 1: + abort(404, description="Requested page does not exist") + results_per_page = args.get("per_page", default=100, type=int) + maximum_results_per_page = 10000 + if results_per_page > maximum_results_per_page: + abort(400, description="Requested too many search results") + + queryparser = xapian.QueryParser() + queryparser.set_stemmer(xapian.Stem("en")) + queryparser.set_stemming_strategy(queryparser.STEM_SOME) + queryparser.add_boolean_prefix("author", "A") + queryparser.add_boolean_prefix("species", "XS") + queryparser.add_boolean_prefix("group", "XG") + queryparser.add_boolean_prefix("tissue", "XI") + queryparser.add_boolean_prefix("dataset", "XDS") + queryparser.add_boolean_prefix("symbol", "XY") + queryparser.add_boolean_prefix("chr", "XC") + queryparser.add_boolean_prefix("peakchr", "XPC") + queryparser.add_prefix("description", "XD") + for i, prefix in enumerate(["mean:", "peak:", "mb:", "peakmb:", "additive:", "year:"]): + queryparser.add_rangeprocessor(xapian.NumberRangeProcessor(i, prefix)) + query = queryparser.parse_query(querystring) + traits = [] + # pylint: disable=invalid-name + with xapian_database() as db: + enquire = xapian.Enquire(db) + # Filter documents by type. + enquire.set_query(xapian.Query(xapian.Query.OP_FILTER, + query, + xapian.Query(f"XT{search_type}"))) + for xapian_match in enquire.get_mset((page-1)*results_per_page, results_per_page): + trait = MonadicDict(json.loads(xapian_match.document.get_data())) + # Add PubMed link to phenotype search results. + if search_type == "phenotype": + trait["pubmed_link"] = trait["pubmed_id"].map( + lambda pubmed_id: "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?" + + urllib.parse.urlencode({"cmd": "Retrieve", + "db": "PubMed", + "list_uids": pubmed_id, + "dopt": "Abstract"})) + traits.append(trait.data) + return jsonify(traits) -- cgit v1.2.3