about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xscripts/index-genenetwork14
1 files changed, 8 insertions, 6 deletions
diff --git a/scripts/index-genenetwork b/scripts/index-genenetwork
index 219858c..de3edb0 100755
--- a/scripts/index-genenetwork
+++ b/scripts/index-genenetwork
@@ -240,10 +240,12 @@ def build_rdf_cache(sparql_uri: str, query: str, remove_common_words: bool = Fal
     return smaller_cache
 
 
-def hash_generif_graph(generif_file: pathlib.Path) -> str:
-    with open(generif_file, encoding="utf-8") as f_:
-        data = f_.read()
-        return hashlib.md5(data.encode()).hexdigest()
+def hash_rdf_graph(ttl_dir: pathlib.Path) -> str:
+    ttl_hash = hashlib.new("md5")
+    for ttl_file in ttl_dir.glob("*.ttl"):
+        with open(ttl_file, encoding="utf-8") as f_:
+            ttl_hash.update(f_.read().encode())
+    return ttl_hash.hexdigest()
 
 
 # pylint: disable=invalid-name
@@ -528,7 +530,7 @@ def is_data_modified(xapian_directory: str,
         generif = pathlib.Path("/var/lib/data/generif-metadata.ttl")
         generif_checksum = "-1"
         if generif.exists():
-            generif_checksum = hash_generif_graph(generif)
+            generif_checksum = hash_rdf_graph(generif)
         if (db.get_metadata("generif-checksum").decode() == generif_checksum and
             db.get_metadata("checksums").decode() == checksums):
             sys.exit(1)
@@ -586,7 +588,7 @@ def create_xapian_index(xapian_directory: str, sql_uri: str,
                 logging.info("Writing generif checksums into index")
                 generif = pathlib.Path("/var/lib/data/generif-metadata.ttl")
                 if generif.exists():
-                    db.set_metadata("generif-checksum", hash_generif_graph(generif).encode())
+                    db.set_metadata("generif-checksum", hash_rdf_graph(generif).encode())
         for child in combined_index.iterdir():
             shutil.move(child, xapian_directory)
     logging.info("Index built")