aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorArun Isaac2023-05-31 19:23:17 +0100
committerArun Isaac2023-05-31 20:28:46 +0100
commit78d641b1490b8b0d2b4a5437d6af90742f3011f8 (patch)
tree4cdc406103a333284a9408643dac6dc836fa14f3 /scripts
parent400cc9d9c5fa0f1db92679e7ea8d936272029e14 (diff)
downloadgenenetwork3-78d641b1490b8b0d2b4a5437d6af90742f3011f8.tar.gz
scripts: Write table checksums into index.
* scripts/index-genenetwork (main): Write table checksums into index.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/index-genenetwork13
1 files changed, 13 insertions, 0 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index b5684f2..1f649cf 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -396,6 +396,19 @@ def main(xapian_directory: str, sql_uri: str) -> None:
index_query(index_phenotypes, phenotypes_query, xapian_build_directory, sql_uri)
logging.info("Combining and compacting indices")
xapian_compact(combined_index, list(xapian_build_directory.iterdir()))
+ logging.info("Writing table checksums into index")
+ with locked_xapian_writable_database(combined_index) as db:
+ # Build a (deduplicated) set of all tables referenced in
+ # queries.
+ tables = set(clause if isinstance(clause, str) else clause.table
+ for clause in genes_query.tables + phenotypes_query.tables)
+ with database_connection(sql_uri) as conn:
+ checksums = [
+ result["Checksum"].bind(str) # type: ignore
+ for result in query_sql(conn, f"CHECKSUM TABLE {', '.join(tables)}")
+ ]
+ db.set_metadata("tables", " ".join(tables))
+ db.set_metadata("checksums", " ".join(checksums))
for child in combined_index.iterdir():
shutil.move(child, xapian_directory)
logging.info("Index built")