From a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 24 Jun 2024 19:24:33 +0000 Subject: Use dataset Name instead of FullName for indexing The Name is generally used as the identifier, while the FullName can container spaces which can cause problems --- scripts/index-genenetwork | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'scripts/index-genenetwork') diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index 34c68f2..d1eaf6f 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -304,7 +304,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac trait["additive"].bind(partial(add_additive, doc)) # Index free text. - for key in ["description", "tissue", "dataset_fullname"]: + for key in ["description", "tissue", "dataset"]: trait[key].bind(index_text) trait.pop("probe_target_description").bind(index_text) for key in ["name", "symbol", "species", "group"]: @@ -316,7 +316,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac trait["species"].bind(index_species) trait["group"].bind(index_group) trait["tissue"].bind(index_tissue) - trait["dataset_fullname"].bind(index_dataset) + trait["dataset"].bind(index_dataset) trait["symbol"].bind(index_symbol) trait["chr"].bind(index_chr) trait["geno_chr"].bind(index_peakchr) @@ -352,7 +352,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam trait["year"].bind(partial(add_year, doc)) # Index free text. - for key in ["description", "authors", "dataset_fullname"]: + for key in ["description", "authors", "dataset"]: trait[key].bind(index_text) for key in ["Abstract", "Title"]: trait.pop(key).bind(index_text) @@ -366,7 +366,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam trait["group"].bind(index_group) trait["authors"].bind(index_authors) trait["geno_chr"].bind(index_peakchr) - trait["dataset_fullname"].bind(index_dataset) + trait["dataset"].bind(index_dataset) # Convert name from integer to string. trait["name"] = trait["name"].map(str) -- cgit v1.2.3