Use dataset Name instead of FullName for indexing

The Name is generally used as the identifier, while the FullName can container spaces which can cause problems
author: zsloan 2024-06-24 19:24:33 +0000
committer: zsloan 2024-06-24 19:24:33 +0000
commit: a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f (patch)
tree: 6baf7899d081a208799e21dfcb9928e4b0abf921 /scripts
parent: de1594fc15a5b81bcf2618c6e83ad4b93280d9cf (diff)
download: genenetwork3-a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f.tar.gz
1 files changed, 4 insertions, 4 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 34c68f2..d1eaf6f 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -304,7 +304,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac
             trait["additive"].bind(partial(add_additive, doc))
 
             # Index free text.
-            for key in ["description", "tissue", "dataset_fullname"]:
+            for key in ["description", "tissue", "dataset"]:
                 trait[key].bind(index_text)
             trait.pop("probe_target_description").bind(index_text)
             for key in ["name", "symbol", "species", "group"]:
@@ -316,7 +316,7 @@ def index_genes(xapian_build_directory: pathlib.Path, chunk_index: int, namespac
             trait["species"].bind(index_species)
             trait["group"].bind(index_group)
             trait["tissue"].bind(index_tissue)
-            trait["dataset_fullname"].bind(index_dataset)
+            trait["dataset"].bind(index_dataset)
             trait["symbol"].bind(index_symbol)
             trait["chr"].bind(index_chr)
             trait["geno_chr"].bind(index_peakchr)
@@ -352,7 +352,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam
             trait["year"].bind(partial(add_year, doc))
 
             # Index free text.
-            for key in ["description", "authors", "dataset_fullname"]:
+            for key in ["description", "authors", "dataset"]:
                 trait[key].bind(index_text)
             for key in ["Abstract", "Title"]:
                 trait.pop(key).bind(index_text)
@@ -366,7 +366,7 @@ def index_phenotypes(xapian_build_directory: pathlib.Path, chunk_index: int, nam
             trait["group"].bind(index_group)
             trait["authors"].bind(index_authors)
             trait["geno_chr"].bind(index_peakchr)
-            trait["dataset_fullname"].bind(index_dataset)
+            trait["dataset"].bind(index_dataset)
 
             # Convert name from integer to string.
             trait["name"] = trait["name"].map(str)
author	zsloan	2024-06-24 19:24:33 +0000
committer	zsloan	2024-06-24 19:24:33 +0000
commit	a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f (patch)
tree	6baf7899d081a208799e21dfcb9928e4b0abf921 /scripts
parent	de1594fc15a5b81bcf2618c6e83ad4b93280d9cf (diff)
download	genenetwork3-a2eb0c2dff1ab93ff88f2c22754f4d3fc593078f.tar.gz