diff options
author | Munyoki Kilyungi | 2024-07-03 15:15:37 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-07-03 16:53:31 +0300 |
commit | 4fd5710dc04b51c0953a2063d9e934dddc13ad6c (patch) | |
tree | 914a30a1f58c2da07eec24b6b0571faff7e5fc62 /scripts | |
parent | 1dc6d62ec211b7ce1373bc4a0521037886523058 (diff) | |
download | genenetwork3-4fd5710dc04b51c0953a2063d9e934dddc13ad6c.tar.gz |
Generate a checksum for all the ttl files.
* scripts/index-genenetwork (hash_generif_graph): Rename to
hash_rdf_graph. Generate a checksum of all the turtle files inside
the ttl directory that's the basis for the GN virtuoso graph.
(create_xapian_index): Rename hash_generif_graph -> hash_rdf_graph.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/index-genenetwork | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index 219858c..de3edb0 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -240,10 +240,12 @@ def build_rdf_cache(sparql_uri: str, query: str, remove_common_words: bool = Fal return smaller_cache -def hash_generif_graph(generif_file: pathlib.Path) -> str: - with open(generif_file, encoding="utf-8") as f_: - data = f_.read() - return hashlib.md5(data.encode()).hexdigest() +def hash_rdf_graph(ttl_dir: pathlib.Path) -> str: + ttl_hash = hashlib.new("md5") + for ttl_file in ttl_dir.glob("*.ttl"): + with open(ttl_file, encoding="utf-8") as f_: + ttl_hash.update(f_.read().encode()) + return ttl_hash.hexdigest() # pylint: disable=invalid-name @@ -528,7 +530,7 @@ def is_data_modified(xapian_directory: str, generif = pathlib.Path("/var/lib/data/generif-metadata.ttl") generif_checksum = "-1" if generif.exists(): - generif_checksum = hash_generif_graph(generif) + generif_checksum = hash_rdf_graph(generif) if (db.get_metadata("generif-checksum").decode() == generif_checksum and db.get_metadata("checksums").decode() == checksums): sys.exit(1) @@ -586,7 +588,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str, logging.info("Writing generif checksums into index") generif = pathlib.Path("/var/lib/data/generif-metadata.ttl") if generif.exists(): - db.set_metadata("generif-checksum", hash_generif_graph(generif).encode()) + db.set_metadata("generif-checksum", hash_rdf_graph(generif).encode()) for child in combined_index.iterdir(): shutil.move(child, xapian_directory) logging.info("Index built") |