aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2022-10-28 14:55:56 +0530
committerArun Isaac2022-10-28 14:55:56 +0530
commitfcf257bff816703433d10b942f959dbb78f6c5e3 (patch)
tree1bbdf71c63078b42974b2d871e850e97b55c60e3
parent38e7b62c2e94fe4ae1dbaa91349b7b36792685ac (diff)
downloadgenenetwork2-fcf257bff816703433d10b942f959dbb78f6c5e3.tar.gz
Query genenetwork3 API for search.
genenetwork2 should be a thin wrapper web UI around the genenetwork3 API. Hence, this move. * etc/default_settings.py (XAPIAN_DB_PATH): Delete variable. * wqflask/wqflask/database.py: Remove xapian import. (xapian_database, xapian_writable_database): Delete functions. * wqflask/wqflask/gsearch.py: Do not import json, xapian, Nothing from pymonad.maybe, base, utility.authentication_tools and wqflask.database. Import MonadicDict from gn3.monads instead of utility.monads and GN3_LOCAL_URL from utility.tools. (GSearch.__init__): Query genenetwork3 search API instead of directly reading a Xapian index.
-rw-r--r--etc/default_settings.py2
-rw-r--r--wqflask/wqflask/database.py23
-rw-r--r--wqflask/wqflask/gsearch.py97
3 files changed, 36 insertions, 86 deletions
diff --git a/etc/default_settings.py b/etc/default_settings.py
index ab5a06cf..6d7ac063 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -35,8 +35,6 @@ GN2_PROXY = "http://localhost:8080"
# GN PROXY
GN_PROXY_URL="https://genenetwork.org/gn3-proxy/"
-# Xapian index
-XAPIAN_DB_PATH = "xapian"
# ---- MySQL
diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py
index 7e03a000..663e2ebf 100644
--- a/wqflask/wqflask/database.py
+++ b/wqflask/wqflask/database.py
@@ -8,7 +8,6 @@ import contextlib
#: type: ignore
import MySQLdb
-import xapian
class Connection(Protocol):
@@ -61,25 +60,3 @@ def database_connection() -> Iterator[Connection]:
yield connection
finally:
connection.close()
-
-
-@contextlib.contextmanager
-def xapian_database():
- """Open xapian database read-only."""
- # pylint: disable-next=invalid-name
- db = xapian.Database(get_setting("XAPIAN_DB_PATH"))
- try:
- yield db
- finally:
- db.close()
-
-
-@contextlib.contextmanager
-def xapian_writable_database():
- """Open xapian database for writing."""
- # pylint: disable-next=invalid-name
- db = xapian.WritableDatabase(get_setting("XAPIAN_DB_PATH"))
- try:
- yield db
- finally:
- db.close()
diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py
index 4efa6740..202d2670 100644
--- a/wqflask/wqflask/gsearch.py
+++ b/wqflask/wqflask/gsearch.py
@@ -1,13 +1,11 @@
-import json
+from urllib.parse import urlencode, urljoin
-from pymonad.maybe import Just, Maybe, Nothing
+from pymonad.maybe import Just, Maybe
from pymonad.tools import curry
-import xapian
+import requests
-from base import webqtlConfig
-from utility.authentication_tools import check_resource_availability
-from utility.monads import MonadicDict
-from wqflask.database import xapian_database
+from gn3.monads import MonadicDict
+from utility.tools import GN3_LOCAL_URL
# KLUDGE: Due to the lack of pagination, we hard-limit the maximum
# number of search results.
@@ -20,22 +18,6 @@ class GSearch:
self.type = kwargs["type"]
self.terms = kwargs["terms"]
- queryparser = xapian.QueryParser()
- queryparser.set_stemmer(xapian.Stem("en"))
- queryparser.set_stemming_strategy(queryparser.STEM_SOME)
- queryparser.add_boolean_prefix("author", "A")
- queryparser.add_boolean_prefix("species", "XS")
- queryparser.add_boolean_prefix("group", "XG")
- queryparser.add_boolean_prefix("tissue", "XI")
- queryparser.add_boolean_prefix("dataset", "XDS")
- queryparser.add_boolean_prefix("symbol", "XY")
- queryparser.add_boolean_prefix("chr", "XC")
- queryparser.add_boolean_prefix("peakchr", "XPC")
- queryparser.add_prefix("description", "XD")
- for i, prefix in enumerate(["mean:", "peak:", "mb:", "peakmb:", "additive:", "year:"]):
- queryparser.add_rangeprocessor(xapian.NumberRangeProcessor(i, prefix))
- querystring = self.terms
- query = queryparser.parse_query(querystring)
# FIXME: Handle presentation (that is, formatting strings for
# display) in the template rendering, not when retrieving
# search results.
@@ -44,41 +26,34 @@ class GSearch:
hmac = curry(2, lambda dataset, dataset_fullname: f"{dataset_fullname}:{dataset}")
convert_lod = lambda x: x / 4.61
self.trait_list = []
- # pylint: disable=invalid-name
- with xapian_database() as db:
- enquire = xapian.Enquire(db)
- # Filter documents by type.
- enquire.set_query(xapian.Query(xapian.Query.OP_FILTER,
- query,
- xapian.Query(f"XT{self.type}")))
- for i, trait in enumerate(
- [MonadicDict(json.loads(xapian_match.document.get_data()))
- for xapian_match in enquire.get_mset(0, MAX_SEARCH_RESULTS)]):
- trait["index"] = Just(i)
- trait["location_repr"] = (Maybe.apply(chr_mb)
- .to_arguments(trait.pop("chr"), trait.pop("mb")))
- trait["LRS_score_repr"] = trait.pop("lrs").map(convert_lod).map(format3f)
- trait["additive"] = trait["additive"].map(format3f)
- trait["mean"] = trait["mean"].map(format3f)
- trait["max_lrs_text"] = (Maybe.apply(chr_mb)
- .to_arguments(trait.pop("geno_chr"), trait.pop("geno_mb")))
- if self.type == "gene":
- trait["hmac"] = (Maybe.apply(hmac)
- .to_arguments(trait["dataset"], trait["dataset_fullname"]))
- elif self.type == "phenotype":
- trait["display_name"] = trait["name"]
- inbredsetcode = trait.pop("inbredsetcode")
- if inbredsetcode.map(len) == Just(3):
- trait["display_name"] = (Maybe.apply(
- curry(2, lambda inbredsetcode, name: f"{inbredsetcode}_{name}"))
- .to_arguments(inbredsetcode, trait["name"]))
- trait["hmac"] = (Maybe.apply(hmac)
- .to_arguments(trait.pop("dataset_fullname"), trait["name"]))
- trait["authors_display"] = (trait.pop("authors").map(
- lambda authors:
- ", ".join(authors[:2] + ["et al."] if len(authors) >=2 else authors)))
- trait["pubmed_text"] = trait["year"].map(str)
- trait["pubmed_link"] = (trait["pubmed_id"].map(
- lambda pubmedid: webqtlConfig.PUBMEDLINK_URL % pubmedid))
- self.trait_list.append(trait.data)
- self.trait_count = len(self.trait_list)
+ for i, trait in enumerate(requests.get(
+ urljoin(GN3_LOCAL_URL, "/api/search?" + urlencode({"query": self.terms,
+ "type": self.type,
+ "per_page": MAX_SEARCH_RESULTS}))).json()):
+ trait = MonadicDict(trait)
+ trait["index"] = Just(i)
+ trait["location_repr"] = (Maybe.apply(chr_mb)
+ .to_arguments(trait.pop("chr"), trait.pop("mb")))
+ trait["LRS_score_repr"] = trait.pop("lrs").map(convert_lod).map(format3f)
+ trait["additive"] = trait["additive"].map(format3f)
+ trait["mean"] = trait["mean"].map(format3f)
+ trait["max_lrs_text"] = (Maybe.apply(chr_mb)
+ .to_arguments(trait.pop("geno_chr"), trait.pop("geno_mb")))
+ if self.type == "gene":
+ trait["hmac"] = (Maybe.apply(hmac)
+ .to_arguments(trait["dataset"], trait["dataset_fullname"]))
+ elif self.type == "phenotype":
+ trait["display_name"] = trait["name"]
+ inbredsetcode = trait.pop("inbredsetcode")
+ if inbredsetcode.map(len) == Just(3):
+ trait["display_name"] = (Maybe.apply(
+ curry(2, lambda inbredsetcode, name: f"{inbredsetcode}_{name}"))
+ .to_arguments(inbredsetcode, trait["name"]))
+ trait["hmac"] = (Maybe.apply(hmac)
+ .to_arguments(trait.pop("dataset_fullname"), trait["name"]))
+ trait["authors_display"] = (trait.pop("authors").map(
+ lambda authors:
+ ", ".join(authors[:2] + ["et al."] if len(authors) >=2 else authors)))
+ trait["pubmed_text"] = trait["year"].map(str)
+ self.trait_list.append(trait.data)
+ self.trait_count = len(self.trait_list)