aboutsummaryrefslogtreecommitdiff
path: root/wqflask/scripts/index.py
diff options
context:
space:
mode:
authorArun Isaac2022-09-28 21:56:42 +0530
committerArun Isaac2022-09-29 16:15:47 +0530
commitd47977406a8c86e9e3afbfeb6b1779b041f7f4f6 (patch)
tree8ad9703c482627dee6c8757fd0792e770fb936a0 /wqflask/scripts/index.py
parentb23c36a8c7b66296cf972ac7b3dedfe8d427690c (diff)
downloadgenenetwork2-d47977406a8c86e9e3afbfeb6b1779b041f7f4f6.tar.gz
Index prefixed fields and add values.
* wqflask/scripts/index.py (main): Index fields with prefixes, and add values for range queries.
Diffstat (limited to 'wqflask/scripts/index.py')
-rw-r--r--wqflask/scripts/index.py47
1 files changed, 46 insertions, 1 deletions
diff --git a/wqflask/scripts/index.py b/wqflask/scripts/index.py
index 37736951..447cc157 100644
--- a/wqflask/scripts/index.py
+++ b/wqflask/scripts/index.py
@@ -32,8 +32,26 @@ def write_document(db, idterm, doctype, doc):
def main():
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(xapian.Stem("en"))
+
indexer = partial(index_text, termgenerator)
+ authors_indexer = lambda text: termgenerator.index_text(text, 1, "A")
+ species_indexer = lambda text: termgenerator.index_text(text, 1, "XS")
+ group_indexer = lambda text: termgenerator.index_text(text, 1, "XG")
+ tissue_indexer = lambda text: termgenerator.index_text(text, 1, "XI")
+ description_indexer = lambda text: termgenerator.index_text(text, 1, "XD")
+ dataset_indexer = lambda text: termgenerator.index_text(text, 1, "XDS")
+ symbol_indexer = lambda text: termgenerator.index_text(text, 1, "XY")
+ chr_indexer = lambda text: termgenerator.index_text(text, 0, "XC")
+ peakchr_indexer = lambda text: termgenerator.index_text(text, 0, "XPC")
+
+ mean_adder = lambda mean: doc.add_value(0, xapian.sortable_serialise(mean))
+ peak_adder = lambda peak: doc.add_value(1, xapian.sortable_serialise(peak))
+ mb_adder = lambda mb: doc.add_value(2, xapian.sortable_serialise(mb))
+ peakmb_adder = lambda peakmb: doc.add_value(3, xapian.sortable_serialise(peakmb))
+ additive_adder = lambda additive: doc.add_value(4, xapian.sortable_serialise(additive))
+ year_adder = lambda year: doc.add_value(5, xapian.sortable_serialise(float(year)))
+
# FIXME: Some Max LRS values in the DB are wrongly listed as
# 0.000, but shouldn't be displayed. Make them NULLs in the
# database.
@@ -74,6 +92,13 @@ def main():
doc = xapian.Document()
termgenerator.set_document(doc)
+ # Add values.
+ trait["mean"].bind(mean_adder)
+ trait["lrs"].bind(peak_adder)
+ trait["mb"].bind(mb_adder)
+ trait["geno_mb"].bind(peakmb_adder)
+ trait["additive"].bind(additive_adder)
+
# Index text.
trait["name"].bind(indexer)
trait["description"].bind(indexer)
@@ -82,6 +107,14 @@ def main():
trait.pop("genbankid").bind(indexer)
trait.pop("unigeneid").bind(indexer)
trait.pop("probe_target_description").bind(indexer)
+ trait["species"].bind(species_indexer)
+ trait["group"].bind(group_indexer)
+ trait["tissue"].bind(tissue_indexer)
+ trait["description"].bind(description_indexer)
+ trait["dataset"].bind(dataset_indexer)
+ trait["symbol"].bind(symbol_indexer)
+ trait["chr"].bind(chr_indexer)
+ trait["geno_chr"].bind(peakchr_indexer)
doc.set_data(json.dumps(trait.data))
write_document(db, trait["name"].bind(lambda name: f"Q{name}"), "gene", doc)
@@ -99,7 +132,7 @@ def main():
Publication.Abstract,
Publication.Title,
Publication.Authors AS authors,
- Publication.Year AS year,
+ IF(Publication.Year='', 0, Publication.Year) AS year,
Publication.PubMed_ID AS pubmed_id,
PublishXRef.LRS as lrs,
PublishXRef.additive,
@@ -119,6 +152,13 @@ def main():
doc = xapian.Document()
termgenerator.set_document(doc)
+ # Add values.
+ trait["mean"].bind(mean_adder)
+ trait["lrs"].bind(peak_adder)
+ trait["geno_mb"].bind(peakmb_adder)
+ trait["additive"].bind(additive_adder)
+ trait["year"].bind(year_adder)
+
# Index text.
trait.pop("abbreviation").bind(indexer)
trait["description"].bind(indexer)
@@ -127,6 +167,11 @@ def main():
trait.pop("Title").bind(indexer)
trait["authors"].bind(indexer)
trait["inbredsetcode"].bind(indexer)
+ trait["species"].bind(species_indexer)
+ trait["group"].bind(group_indexer)
+ trait["description"].bind(description_indexer)
+ trait["authors"].bind(authors_indexer)
+ trait["geno_chr"].bind(peakchr_indexer)
# Convert name from integer to string.
trait["name"] = trait["name"].map(str)