From fcf257bff816703433d10b942f959dbb78f6c5e3 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 28 Oct 2022 14:55:56 +0530 Subject: Query genenetwork3 API for search. genenetwork2 should be a thin wrapper web UI around the genenetwork3 API. Hence, this move. * etc/default_settings.py (XAPIAN_DB_PATH): Delete variable. * wqflask/wqflask/database.py: Remove xapian import. (xapian_database, xapian_writable_database): Delete functions. * wqflask/wqflask/gsearch.py: Do not import json, xapian, Nothing from pymonad.maybe, base, utility.authentication_tools and wqflask.database. Import MonadicDict from gn3.monads instead of utility.monads and GN3_LOCAL_URL from utility.tools. (GSearch.__init__): Query genenetwork3 search API instead of directly reading a Xapian index. --- wqflask/wqflask/database.py | 23 ----------- wqflask/wqflask/gsearch.py | 97 +++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 84 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py index 7e03a000..663e2ebf 100644 --- a/wqflask/wqflask/database.py +++ b/wqflask/wqflask/database.py @@ -8,7 +8,6 @@ import contextlib #: type: ignore import MySQLdb -import xapian class Connection(Protocol): @@ -61,25 +60,3 @@ def database_connection() -> Iterator[Connection]: yield connection finally: connection.close() - - -@contextlib.contextmanager -def xapian_database(): - """Open xapian database read-only.""" - # pylint: disable-next=invalid-name - db = xapian.Database(get_setting("XAPIAN_DB_PATH")) - try: - yield db - finally: - db.close() - - -@contextlib.contextmanager -def xapian_writable_database(): - """Open xapian database for writing.""" - # pylint: disable-next=invalid-name - db = xapian.WritableDatabase(get_setting("XAPIAN_DB_PATH")) - try: - yield db - finally: - db.close() diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 4efa6740..202d2670 100644 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -1,13 +1,11 @@ -import json +from urllib.parse import urlencode, urljoin -from pymonad.maybe import Just, Maybe, Nothing +from pymonad.maybe import Just, Maybe from pymonad.tools import curry -import xapian +import requests -from base import webqtlConfig -from utility.authentication_tools import check_resource_availability -from utility.monads import MonadicDict -from wqflask.database import xapian_database +from gn3.monads import MonadicDict +from utility.tools import GN3_LOCAL_URL # KLUDGE: Due to the lack of pagination, we hard-limit the maximum # number of search results. @@ -20,22 +18,6 @@ class GSearch: self.type = kwargs["type"] self.terms = kwargs["terms"] - queryparser = xapian.QueryParser() - queryparser.set_stemmer(xapian.Stem("en")) - queryparser.set_stemming_strategy(queryparser.STEM_SOME) - queryparser.add_boolean_prefix("author", "A") - queryparser.add_boolean_prefix("species", "XS") - queryparser.add_boolean_prefix("group", "XG") - queryparser.add_boolean_prefix("tissue", "XI") - queryparser.add_boolean_prefix("dataset", "XDS") - queryparser.add_boolean_prefix("symbol", "XY") - queryparser.add_boolean_prefix("chr", "XC") - queryparser.add_boolean_prefix("peakchr", "XPC") - queryparser.add_prefix("description", "XD") - for i, prefix in enumerate(["mean:", "peak:", "mb:", "peakmb:", "additive:", "year:"]): - queryparser.add_rangeprocessor(xapian.NumberRangeProcessor(i, prefix)) - querystring = self.terms - query = queryparser.parse_query(querystring) # FIXME: Handle presentation (that is, formatting strings for # display) in the template rendering, not when retrieving # search results. @@ -44,41 +26,34 @@ class GSearch: hmac = curry(2, lambda dataset, dataset_fullname: f"{dataset_fullname}:{dataset}") convert_lod = lambda x: x / 4.61 self.trait_list = [] - # pylint: disable=invalid-name - with xapian_database() as db: - enquire = xapian.Enquire(db) - # Filter documents by type. - enquire.set_query(xapian.Query(xapian.Query.OP_FILTER, - query, - xapian.Query(f"XT{self.type}"))) - for i, trait in enumerate( - [MonadicDict(json.loads(xapian_match.document.get_data())) - for xapian_match in enquire.get_mset(0, MAX_SEARCH_RESULTS)]): - trait["index"] = Just(i) - trait["location_repr"] = (Maybe.apply(chr_mb) - .to_arguments(trait.pop("chr"), trait.pop("mb"))) - trait["LRS_score_repr"] = trait.pop("lrs").map(convert_lod).map(format3f) - trait["additive"] = trait["additive"].map(format3f) - trait["mean"] = trait["mean"].map(format3f) - trait["max_lrs_text"] = (Maybe.apply(chr_mb) - .to_arguments(trait.pop("geno_chr"), trait.pop("geno_mb"))) - if self.type == "gene": - trait["hmac"] = (Maybe.apply(hmac) - .to_arguments(trait["dataset"], trait["dataset_fullname"])) - elif self.type == "phenotype": - trait["display_name"] = trait["name"] - inbredsetcode = trait.pop("inbredsetcode") - if inbredsetcode.map(len) == Just(3): - trait["display_name"] = (Maybe.apply( - curry(2, lambda inbredsetcode, name: f"{inbredsetcode}_{name}")) - .to_arguments(inbredsetcode, trait["name"])) - trait["hmac"] = (Maybe.apply(hmac) - .to_arguments(trait.pop("dataset_fullname"), trait["name"])) - trait["authors_display"] = (trait.pop("authors").map( - lambda authors: - ", ".join(authors[:2] + ["et al."] if len(authors) >=2 else authors))) - trait["pubmed_text"] = trait["year"].map(str) - trait["pubmed_link"] = (trait["pubmed_id"].map( - lambda pubmedid: webqtlConfig.PUBMEDLINK_URL % pubmedid)) - self.trait_list.append(trait.data) - self.trait_count = len(self.trait_list) + for i, trait in enumerate(requests.get( + urljoin(GN3_LOCAL_URL, "/api/search?" + urlencode({"query": self.terms, + "type": self.type, + "per_page": MAX_SEARCH_RESULTS}))).json()): + trait = MonadicDict(trait) + trait["index"] = Just(i) + trait["location_repr"] = (Maybe.apply(chr_mb) + .to_arguments(trait.pop("chr"), trait.pop("mb"))) + trait["LRS_score_repr"] = trait.pop("lrs").map(convert_lod).map(format3f) + trait["additive"] = trait["additive"].map(format3f) + trait["mean"] = trait["mean"].map(format3f) + trait["max_lrs_text"] = (Maybe.apply(chr_mb) + .to_arguments(trait.pop("geno_chr"), trait.pop("geno_mb"))) + if self.type == "gene": + trait["hmac"] = (Maybe.apply(hmac) + .to_arguments(trait["dataset"], trait["dataset_fullname"])) + elif self.type == "phenotype": + trait["display_name"] = trait["name"] + inbredsetcode = trait.pop("inbredsetcode") + if inbredsetcode.map(len) == Just(3): + trait["display_name"] = (Maybe.apply( + curry(2, lambda inbredsetcode, name: f"{inbredsetcode}_{name}")) + .to_arguments(inbredsetcode, trait["name"])) + trait["hmac"] = (Maybe.apply(hmac) + .to_arguments(trait.pop("dataset_fullname"), trait["name"])) + trait["authors_display"] = (trait.pop("authors").map( + lambda authors: + ", ".join(authors[:2] + ["et al."] if len(authors) >=2 else authors))) + trait["pubmed_text"] = trait["year"].map(str) + self.trait_list.append(trait.data) + self.trait_count = len(self.trait_list) -- cgit v1.2.3