aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2024-06-24 19:24:33 +0000
committerzsloan2024-06-24 19:24:33 +0000
commita2eb0c2dff1ab93ff88f2c22754f4d3fc593078f (patch)
tree6baf7899d081a208799e21dfcb9928e4b0abf921
parentde1594fc15a5b81bcf2618c6e83ad4b93280d9cf (diff)
downloadgenenetwork3-a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f.tar.gz
Use dataset Name instead of FullName for indexing
The Name is generally used as the identifier, while the FullName can container spaces which can cause problems
-rwxr-xr-xscripts/index-genenetwork8
1 files changed, 4 insertions, 4 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 34c68f2..d1eaf6f 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -304,7 +304,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac
trait["additive"].bind(partial(add_additive, doc))
# Index free text.
- for key in ["description", "tissue", "dataset_fullname"]:
+ for key in ["description", "tissue", "dataset"]:
trait[key].bind(index_text)
trait.pop("probe_target_description").bind(index_text)
for key in ["name", "symbol", "species", "group"]:
@@ -316,7 +316,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac
trait["species"].bind(index_species)
trait["group"].bind(index_group)
trait["tissue"].bind(index_tissue)
- trait["dataset_fullname"].bind(index_dataset)
+ trait["dataset"].bind(index_dataset)
trait["symbol"].bind(index_symbol)
trait["chr"].bind(index_chr)
trait["geno_chr"].bind(index_peakchr)
@@ -352,7 +352,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam
trait["year"].bind(partial(add_year, doc))
# Index free text.
- for key in ["description", "authors", "dataset_fullname"]:
+ for key in ["description", "authors", "dataset"]:
trait[key].bind(index_text)
for key in ["Abstract", "Title"]:
trait.pop(key).bind(index_text)
@@ -366,7 +366,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam
trait["group"].bind(index_group)
trait["authors"].bind(index_authors)
trait["geno_chr"].bind(index_peakchr)
- trait["dataset_fullname"].bind(index_dataset)
+ trait["dataset"].bind(index_dataset)
# Convert name from integer to string.
trait["name"] = trait["name"].map(str)