diff options
author | Arun Isaac | 2022-09-28 21:56:42 +0530 |
---|---|---|
committer | Arun Isaac | 2022-09-29 16:15:47 +0530 |
commit | d47977406a8c86e9e3afbfeb6b1779b041f7f4f6 (patch) | |
tree | 8ad9703c482627dee6c8757fd0792e770fb936a0 /wqflask/scripts | |
parent | b23c36a8c7b66296cf972ac7b3dedfe8d427690c (diff) | |
download | genenetwork2-d47977406a8c86e9e3afbfeb6b1779b041f7f4f6.tar.gz |
Index prefixed fields and add values.
* wqflask/scripts/index.py (main): Index fields with prefixes, and add
values for range queries.
Diffstat (limited to 'wqflask/scripts')
-rw-r--r-- | wqflask/scripts/index.py | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/wqflask/scripts/index.py b/wqflask/scripts/index.py index 37736951..447cc157 100644 --- a/wqflask/scripts/index.py +++ b/wqflask/scripts/index.py @@ -32,8 +32,26 @@ def write_document(db, idterm, doctype, doc): def main(): termgenerator = xapian.TermGenerator() termgenerator.set_stemmer(xapian.Stem("en")) + indexer = partial(index_text, termgenerator) + authors_indexer = lambda text: termgenerator.index_text(text, 1, "A") + species_indexer = lambda text: termgenerator.index_text(text, 1, "XS") + group_indexer = lambda text: termgenerator.index_text(text, 1, "XG") + tissue_indexer = lambda text: termgenerator.index_text(text, 1, "XI") + description_indexer = lambda text: termgenerator.index_text(text, 1, "XD") + dataset_indexer = lambda text: termgenerator.index_text(text, 1, "XDS") + symbol_indexer = lambda text: termgenerator.index_text(text, 1, "XY") + chr_indexer = lambda text: termgenerator.index_text(text, 0, "XC") + peakchr_indexer = lambda text: termgenerator.index_text(text, 0, "XPC") + + mean_adder = lambda mean: doc.add_value(0, xapian.sortable_serialise(mean)) + peak_adder = lambda peak: doc.add_value(1, xapian.sortable_serialise(peak)) + mb_adder = lambda mb: doc.add_value(2, xapian.sortable_serialise(mb)) + peakmb_adder = lambda peakmb: doc.add_value(3, xapian.sortable_serialise(peakmb)) + additive_adder = lambda additive: doc.add_value(4, xapian.sortable_serialise(additive)) + year_adder = lambda year: doc.add_value(5, xapian.sortable_serialise(float(year))) + # FIXME: Some Max LRS values in the DB are wrongly listed as # 0.000, but shouldn't be displayed. Make them NULLs in the # database. @@ -74,6 +92,13 @@ def main(): doc = xapian.Document() termgenerator.set_document(doc) + # Add values. + trait["mean"].bind(mean_adder) + trait["lrs"].bind(peak_adder) + trait["mb"].bind(mb_adder) + trait["geno_mb"].bind(peakmb_adder) + trait["additive"].bind(additive_adder) + # Index text. trait["name"].bind(indexer) trait["description"].bind(indexer) @@ -82,6 +107,14 @@ def main(): trait.pop("genbankid").bind(indexer) trait.pop("unigeneid").bind(indexer) trait.pop("probe_target_description").bind(indexer) + trait["species"].bind(species_indexer) + trait["group"].bind(group_indexer) + trait["tissue"].bind(tissue_indexer) + trait["description"].bind(description_indexer) + trait["dataset"].bind(dataset_indexer) + trait["symbol"].bind(symbol_indexer) + trait["chr"].bind(chr_indexer) + trait["geno_chr"].bind(peakchr_indexer) doc.set_data(json.dumps(trait.data)) write_document(db, trait["name"].bind(lambda name: f"Q{name}"), "gene", doc) @@ -99,7 +132,7 @@ def main(): Publication.Abstract, Publication.Title, Publication.Authors AS authors, - Publication.Year AS year, + IF(Publication.Year='', 0, Publication.Year) AS year, Publication.PubMed_ID AS pubmed_id, PublishXRef.LRS as lrs, PublishXRef.additive, @@ -119,6 +152,13 @@ def main(): doc = xapian.Document() termgenerator.set_document(doc) + # Add values. + trait["mean"].bind(mean_adder) + trait["lrs"].bind(peak_adder) + trait["geno_mb"].bind(peakmb_adder) + trait["additive"].bind(additive_adder) + trait["year"].bind(year_adder) + # Index text. trait.pop("abbreviation").bind(indexer) trait["description"].bind(indexer) @@ -127,6 +167,11 @@ def main(): trait.pop("Title").bind(indexer) trait["authors"].bind(indexer) trait["inbredsetcode"].bind(indexer) + trait["species"].bind(species_indexer) + trait["group"].bind(group_indexer) + trait["description"].bind(description_indexer) + trait["authors"].bind(authors_indexer) + trait["geno_chr"].bind(peakchr_indexer) # Convert name from integer to string. trait["name"] = trait["name"].map(str) |