diff options
Diffstat (limited to 'scripts/index-genenetwork')
-rwxr-xr-x | scripts/index-genenetwork | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork index 219858c..de3edb0 100755 --- a/scripts/index-genenetwork +++ b/scripts/index-genenetwork @@ -240,10 +240,12 @@ def build_rdf_cache(sparql_uri: str, query: str, remove_common_words: bool = Fal return smaller_cache -def hash_generif_graph(generif_file: pathlib.Path) -> str: - with open(generif_file, encoding="utf-8") as f_: - data = f_.read() - return hashlib.md5(data.encode()).hexdigest() +def hash_rdf_graph(ttl_dir: pathlib.Path) -> str: + ttl_hash = hashlib.new("md5") + for ttl_file in ttl_dir.glob("*.ttl"): + with open(ttl_file, encoding="utf-8") as f_: + ttl_hash.update(f_.read().encode()) + return ttl_hash.hexdigest() # pylint: disable=invalid-name @@ -528,7 +530,7 @@ def is_data_modified(xapian_directory: str, generif = pathlib.Path("/var/lib/data/generif-metadata.ttl") generif_checksum = "-1" if generif.exists(): - generif_checksum = hash_generif_graph(generif) + generif_checksum = hash_rdf_graph(generif) if (db.get_metadata("generif-checksum").decode() == generif_checksum and db.get_metadata("checksums").decode() == checksums): sys.exit(1) @@ -586,7 +588,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str, logging.info("Writing generif checksums into index") generif = pathlib.Path("/var/lib/data/generif-metadata.ttl") if generif.exists(): - db.set_metadata("generif-checksum", hash_generif_graph(generif).encode()) + db.set_metadata("generif-checksum", hash_rdf_graph(generif).encode()) for child in combined_index.iterdir(): shutil.move(child, xapian_directory) logging.info("Index built") |