aboutsummaryrefslogtreecommitdiff
path: root/gn3/api
diff options
context:
space:
mode:
authorArun Isaac2022-10-26 14:23:34 +0530
committerArun Isaac2022-10-28 13:20:13 +0530
commit60e2ca08beb9a9d5795f0a44df096fc6c0bc2583 (patch)
tree9af459b0b06c50b7cbb0e64f73318993f905cf67 /gn3/api
parent6a9db0c1734a22e792f2204c029810ca1cea7f86 (diff)
downloadgenenetwork3-60e2ca08beb9a9d5795f0a44df096fc6c0bc2583.tar.gz
Add search.
* gn3/api/search.py: New file. * gn3/app.py: Register the search blueprint.
Diffstat (limited to 'gn3/api')
-rw-r--r--gn3/api/search.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/gn3/api/search.py b/gn3/api/search.py
new file mode 100644
index 0000000..78a3245
--- /dev/null
+++ b/gn3/api/search.py
@@ -0,0 +1,62 @@
+"""Search using Xapian index."""
+
+import json
+import urllib.parse
+
+from flask import abort, Blueprint, jsonify, request
+import xapian
+
+from gn3.monads import MonadicDict
+from gn3.db_utils import xapian_database
+
+search = Blueprint("search", __name__)
+
+@search.route("/")
+def search_results():
+ """Search Xapian index and return a list of results."""
+ args = request.args
+ search_type = args.get("type", default="gene")
+ querystring = args.get("query", default="")
+ page = args.get("page", default=1, type=int)
+ if page < 1:
+ abort(404, description="Requested page does not exist")
+ results_per_page = args.get("per_page", default=100, type=int)
+ maximum_results_per_page = 10000
+ if results_per_page > maximum_results_per_page:
+ abort(400, description="Requested too many search results")
+
+ queryparser = xapian.QueryParser()
+ queryparser.set_stemmer(xapian.Stem("en"))
+ queryparser.set_stemming_strategy(queryparser.STEM_SOME)
+ queryparser.add_boolean_prefix("author", "A")
+ queryparser.add_boolean_prefix("species", "XS")
+ queryparser.add_boolean_prefix("group", "XG")
+ queryparser.add_boolean_prefix("tissue", "XI")
+ queryparser.add_boolean_prefix("dataset", "XDS")
+ queryparser.add_boolean_prefix("symbol", "XY")
+ queryparser.add_boolean_prefix("chr", "XC")
+ queryparser.add_boolean_prefix("peakchr", "XPC")
+ queryparser.add_prefix("description", "XD")
+ for i, prefix in enumerate(["mean:", "peak:", "mb:", "peakmb:", "additive:", "year:"]):
+ queryparser.add_rangeprocessor(xapian.NumberRangeProcessor(i, prefix))
+ query = queryparser.parse_query(querystring)
+ traits = []
+ # pylint: disable=invalid-name
+ with xapian_database() as db:
+ enquire = xapian.Enquire(db)
+ # Filter documents by type.
+ enquire.set_query(xapian.Query(xapian.Query.OP_FILTER,
+ query,
+ xapian.Query(f"XT{search_type}")))
+ for xapian_match in enquire.get_mset((page-1)*results_per_page, results_per_page):
+ trait = MonadicDict(json.loads(xapian_match.document.get_data()))
+ # Add PubMed link to phenotype search results.
+ if search_type == "phenotype":
+ trait["pubmed_link"] = trait["pubmed_id"].map(
+ lambda pubmed_id: "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?"
+ + urllib.parse.urlencode({"cmd": "Retrieve",
+ "db": "PubMed",
+ "list_uids": pubmed_id,
+ "dopt": "Abstract"}))
+ traits.append(trait.data)
+ return jsonify(traits)