diff options
Diffstat (limited to 'gn3/api')
-rw-r--r-- | gn3/api/search.py | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/gn3/api/search.py b/gn3/api/search.py new file mode 100644 index 0000000..78a3245 --- /dev/null +++ b/gn3/api/search.py @@ -0,0 +1,62 @@ +"""Search using Xapian index.""" + +import json +import urllib.parse + +from flask import abort, Blueprint, jsonify, request +import xapian + +from gn3.monads import MonadicDict +from gn3.db_utils import xapian_database + +search = Blueprint("search", __name__) + +@search.route("/") +def search_results(): + """Search Xapian index and return a list of results.""" + args = request.args + search_type = args.get("type", default="gene") + querystring = args.get("query", default="") + page = args.get("page", default=1, type=int) + if page < 1: + abort(404, description="Requested page does not exist") + results_per_page = args.get("per_page", default=100, type=int) + maximum_results_per_page = 10000 + if results_per_page > maximum_results_per_page: + abort(400, description="Requested too many search results") + + queryparser = xapian.QueryParser() + queryparser.set_stemmer(xapian.Stem("en")) + queryparser.set_stemming_strategy(queryparser.STEM_SOME) + queryparser.add_boolean_prefix("author", "A") + queryparser.add_boolean_prefix("species", "XS") + queryparser.add_boolean_prefix("group", "XG") + queryparser.add_boolean_prefix("tissue", "XI") + queryparser.add_boolean_prefix("dataset", "XDS") + queryparser.add_boolean_prefix("symbol", "XY") + queryparser.add_boolean_prefix("chr", "XC") + queryparser.add_boolean_prefix("peakchr", "XPC") + queryparser.add_prefix("description", "XD") + for i, prefix in enumerate(["mean:", "peak:", "mb:", "peakmb:", "additive:", "year:"]): + queryparser.add_rangeprocessor(xapian.NumberRangeProcessor(i, prefix)) + query = queryparser.parse_query(querystring) + traits = [] + # pylint: disable=invalid-name + with xapian_database() as db: + enquire = xapian.Enquire(db) + # Filter documents by type. + enquire.set_query(xapian.Query(xapian.Query.OP_FILTER, + query, + xapian.Query(f"XT{search_type}"))) + for xapian_match in enquire.get_mset((page-1)*results_per_page, results_per_page): + trait = MonadicDict(json.loads(xapian_match.document.get_data())) + # Add PubMed link to phenotype search results. + if search_type == "phenotype": + trait["pubmed_link"] = trait["pubmed_id"].map( + lambda pubmed_id: "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?" + + urllib.parse.urlencode({"cmd": "Retrieve", + "db": "PubMed", + "list_uids": pubmed_id, + "dopt": "Abstract"})) + traits.append(trait.data) + return jsonify(traits) |